...
 
Commits (3)
......@@ -17,7 +17,7 @@ Description: Provides a simulation framework to simulate streamflow time series
streamflow values and their temporal correlation as expressed by short- and long-range
dependence. The approach is based on the randomization of the phases of the Fourier
transform. We further use the flexible four-parameter Kappa distribution, which allows
for the extrapolation to yet unobserved low and high flows. A detailed description of
for the extrapolation to yet unobserved low and high flows. Alternatively, the empirical or any other distribution can be used. A detailed description of
the simulation approach and an application example can be found
in https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/.
URL: https://git.math.uzh.ch/reinhard.furrer/PRSim-devel
......
importFrom("homtest", "Lmoments", "rand.kappa", "par.kappa", "F.kappa")
importFrom("stats", "rnorm", "runif", "fft", "ks.test")
<<<<<<< HEAD
importFrom("graphics", "hist")
=======
importFrom("goftest", "ad.test")
>>>>>>> f0241bb562c45952cb5f9f249dbd9b2585c469b0
export("prsim")
prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical"), marginalpar=TRUE, GoFtest=NULL,
verbose=TRUE, suppWarn=FALSE, ...){
marginal=c("kappa","empirical",CDF),n_par=4,
verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=c(NULL, "AD", "KS"), ...){
ifft <- function (x) fft(x, inverse = TRUE)/length(x)
......@@ -45,6 +45,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data$timestamp <- as.POSIXct(strptime(tmp, format="%Y %m %d", tz="GMT"))
}
### test whether r_CDF, p_CDF, and CDF_fit are properly defined.
### @Reinhard: could you please implement these tests?
### remove February 29
data <- data[format(data$timestamp, "%m %d") != "02 29",]
......@@ -95,8 +97,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### To enable a sufficient sample size by using daily values in moving window around day i (i.e., reduce uncertainty due to fitting)
if(marginal=="kappa"){
p_vals <- numeric(365)
kap_par_day <- matrix(0, nrow=365, ncol=4)
density_kap <- list()
par_day <- matrix(0, nrow=365, ncol=4)
# density_kap <- list()
### define window length
win_length <- c(1:win_h_length)
for(d in c(1:365)){
......@@ -119,7 +121,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
if(length(test)>1){
kap_par <- test
kap_par_day[d,] <- unlist(kap_par)
par_day[d,] <- unlist(kap_par)
### define vector of quantiles
quant <- sort(data_window)
thresh <- kap_par$xi + kap_par$alfa*(1 - kap_par$h^(-kap_par$k))/kap_par$k
......@@ -134,35 +136,77 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
# density_kap[[d]] <- density(data_kap)
# hist(data_window)
# hist(data_kap,add=T,col="red")
if (GoFtest=="KS")
p_vals[d] <- ks.test(data_window, data_kap)$p.value ### kappa distribution not rejected at alpha=0.05
if (GoFtest=="AD")
p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi,alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h )$p.value ### NOTE: TRY
if (tolower(GoFtest)=="ks")
p_vals[d] <- ks.test(data_window, data_kap)$p.value ### kappa distribution not rejected at alpha=0.05
if (tolower(GoFtest)=="ad")
if(length(try(ad.test(data_window,F.kappa,xi=kap_par$xi,alfa=kap_par$alfa,k=kap_par$k,h=kap_par$h)))==1){
p_vals[d] <- NA
}else{
p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi, alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h)$p.value
}
} else{
if(d==1){
p_vals[d] <- NA
kap_par_day[d,] <- NA
par_day[d,] <- NA
}else{
p_vals[d] <- p_vals[d-1]
kap_par_day[d,] <- kap_par_day[d-1,]
par_day[d,] <- par_day[d-1,]
}
}
}
###qu: check if subsequent NAs work properly.
### replace NA entries by values of subsequent day
if(length(which(is.na(kap_par_day[,1])))>0){
indices <- rev(which(is.na(kap_par_day[,1])))
if(length(which(is.na(par_day[,1])))>0){
indices <- rev(which(is.na(par_day[,1])))
for(i in 1:length(indices)){
kap_par_day[indices[i],] <- kap_par_day[indices[i]+1,]
par_day[indices[i],] <- par_day[indices[i]+1,]
}
}
}
# which(unlist(p_val_list)<0.05) ### non-rejected for most days, difficulty with winter months (relation to hp production?)
### use either a predefined distribution in R or define own function
if(marginal!=c("kappa","empirical")){
p_vals <- numeric(365)
par_day <- matrix(0, nrow=365, ncol=n_par)
for(d in c(1:365)){
### define window length
win_length <- seq(1:15)
### define start and end of window
before <- data$index[d+365-win_length]
after <- data$index[d+365+win_length-1]
### define days within window
ids <- c(before,after)
### determine values in window around day i
data_window <- data$Qobs[which(data$index%in%ids)]
### fit generalized gamma distribution
# fitdistr(x=data_window,dgengamma.stacy,start=list("scale"=1,"k"=6,"d"=2)) ### package VGAM: fitting does not work
### use function from package felxsurv
# theta <- CDF.fit(xdat=data_window)
theta <- match.fun(paste(CDF,"_fit",sep=""))(xdat=data_window)
# do.call(paste(CDF,".fit",sep=""),list(xdat=data_window))
### goodness of fit test
# data_random <- rCDF(n=length(data_window),theta=theta)
data_random <- match.fun(paste("r_",CDF,sep=""))(n=length(data_window),theta=theta)
# density_gengam[[d]] <- density(data_gengam)
hist(data_window)
hist(data_random,add=T,col="red")
if (tolower(GoFtest)=="ks"){
p_vals[d] <- ks.test(data_window,data_random)$p.value
}
if (tolower(GoFtest)=="ad"){
p_vals[d] <- ad.test(data_window,match.fun(paste("p_",CDF,sep="")),theta)$p.value
}
### store parameters
par_day[d,] <- theta
}
}
### Deseasonalization was not found to be necessary. Use normalized data directly.
data$des <- data$norm
......@@ -263,13 +307,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
### use kappa distribution for backtransformation
if(marginal=="kappa"){
colnames(kap_par_day) <- names(kap_par)
colnames(par_day) <- names(kap_par)
### use monthly Kappa distribution for backtransformation
### simulate random sample of size n from Kappa disribution
data_day$kappa <- rand.kappa(length(data_day$Qobs),
xi=kap_par_day[d,"xi"],alfa=kap_par_day[d,"alfa"],
k=kap_par_day[d,"k"],h=kap_par_day[d,"h"])
xi=par_day[d,"xi"],alfa=par_day[d,"alfa"],
k=par_day[d,"k"],h=par_day[d,"h"])
# if(marginal=="wakeby"){
# ### use Wakeby distribution for backtrasformation
......@@ -309,6 +353,25 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$Qobs[data_new$rank[which(data$index%in%c(d))]]
# }
}
### use any predefined distribution for backtransformation
if(marginal!=c("kappa","empirical")){
### use monthly distribution for backtransformation
### simulate random sample of size n from disribution
data_day$cdf <- match.fun(paste("r_",CDF,sep=""))(n=length(data_day$Qobs),par_day[d,])
data_day$rank <- rank(data_day$cdf)
data_new$rank <- rank(data_new$seasonal)
# hist(data_day$Qobs)
# hist(data_day$cdf,add=T,col="blue")
# data_day$rank <- rank(data_day$cdf)
data_new$rank[which(data$index%in%c(d))] <- rank(data_new[which(data$index%in%c(d)),]$seasonal)
### derive corresponding values from the kappa distribution
### identify value corresponding to rank in the kappa time series
data_ordered <- data_day[order(data_day$rank),]
data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$cdf[data_new$rank[which(data$index%in%c(d))]]
}
} # end for loop
data_sim[[r]] <- data_new$simulated_seasonal
......@@ -325,6 +388,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
data_sim)
if (is.null(GoFtest)) {
p_vals <- NULL
}
if(marginal=="kappa"){
if (marginalpar) { # also return intermediate results
return(list( simulation=data_stoch, pars=par_day, p_val=p_vals))
p_vals <- NULL
}
......@@ -335,6 +405,6 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
return(list( simulation=data_stoch, pars=NULL, p_val=p_vals))
}
}else{
return(list( simulation=data_stoch) )
return(list(simulation=data_stoch) )
}
}
rCDF <- function(n, theta) {
rnorm(n, theta[1], sqrt(theta[2]))
}
CDF.fit <- function(xdat,...) { gev.fit(xdat,..., show=FALSE)$mle}
#data(portpirie)
#gev.fttt(portpirie[,2])
\ No newline at end of file
......@@ -2,8 +2,40 @@
data(runoff)
out <- prsim(data=runoff, number_sim=1, marginal="empirical")
out <- prsim(data=runoff, number_sim=1, marginal="kappa")
out <- prsim(data=runoff, number_sim=1, marginal="kappa",GoFtest = "KS")
### GEV distribution
r_gev <- function(n, theta){
rgev(n, theta[1], theta[2], theta[3])
}
p_gev <- function(x, theta){
pgev(x, theta[1], theta[2], theta[3])
}
gev_fit <- function( xdat, ...) {
gev.fit( xdat, ...)$mle
}
### generalized Pareto distribution of the second kind
r_gb2 <- function(n, theta){
rgb2(n, theta[1], theta[2], theta[3], theta[4])
}
p_gb2 <- function(x, theta){
pgb2(x, theta[1], theta[2], theta[3], theta[4])
}
gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par
}
### @ Reinhard: This is not very elegant yet. CDF needs to be specified before giving it to the function.
### I guess this can be alternatively resolved by using a different argument for marginal. Any ideas?
CDF <-"gev"
out <- prsim(data=runoff, number_sim=1, marginal=CDF,GoFtest = "KS",n_par=3)
CDF <-"gb2"
out <- prsim(data=runoff, number_sim=1, marginal="gb2",GoFtest = "KS",n_par=4)
sim <- out$simulation
# p_val <- out$p_val
par(mai=c(.9,.9,.1,.1))
plot(sim$timestamp[1:1000], sim$Qobs[1:1000], type="l",
xlab="Time [d]", ylab=expression(paste("Discharge [m"^3,"/s]")))
......
......@@ -6,19 +6,22 @@
Applies the algorithm to a single station
}
\usage{
prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical"), marginalpar=TRUE, GoFtest=NULL,
verbose=TRUE, suppWarn=FALSE, ...)
<<<<<<< HEAD
prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa",n_par=4,
verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=FALSE, ...)
}
\arguments{
\item{data}{data frame containing the time indications and runoff of at least one station. See \sQuote{Details}.}
\item{station_id}{identifies the station in case several runoffs are present in \code{data}. See \sQuote{Details}.}
\item{number_sim}{number of simulations to be carried out.}
\item{marginal}{marginal distribution to be used for the backtransformation. Can be either "kappa", "empirical", or CDF. "kappa" uses the four-parameter kappa distribution for backtransformation, "empirical" uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.}
\item{n_par}{number of parameters of the marginal distribution used}
\item{win_h_length}{(half-)length of moving window size.}
\item{marginal}{marginal distribution to be used for the backtransformation, see \sQuote{Details}.}
\item{verbose}{logical. Should progress be reported?}
\item{marginalpar}{logical. Should the estimated parameters of the kappa distribution be returned?}
\item{marginalpar}{logical. Should the estimated parameters of the distribution used be returned?}
\item{suppWarn}{logical. See \sQuote{Details}.}
\item{GoFtest}{NULL, KS or AD. Should a goodness-of-fit test for daily data be performed and the p-values be passed back? NULL performs no test, KS performs a Kolmogorof-Smirnov test, and AD performs an Anderson-Darling test.}
\item{GoFtest}{If (non-null) a GoF test for daily data should be performed, which one (\code{"AD"}, \code{KS}, see \sQuote{Details})}
\item{...}{any other argument passed to the sub-function specifying the cdf for fitting and drawing. See \sQuote{Details} and \sQuote{Examples}. }
}
......@@ -65,20 +68,33 @@ out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE)
# warnings() # as a follow-up to `suppWarn=TRUE`
## Specifying particular CDF:
## example with generalized Beta distribution of the second kind
require( "GB2")
rCDF <- function(n, theta){
CDF <- "gb2"
r_gb2 <- function(n, theta){
rgb2(n, theta[1], theta[2], theta[3], theta[4])
}
pCDF <- function(x, theta){
p_gb2 <- function(x, theta){
pgb2(x, theta[1], theta[2], theta[3], theta[4])
}
CDF.fit <- function( xdat, ...) {
gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par
}
out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE,
marginal="CDF.fit", draw="rCDF")
## example with the Generalized Extreme Value (GEV) distribution
### test with GEV distribution
CDF <- "gev"
r_gev <- function(n, theta){
rgev(n, theta[1], theta[2], theta[3])
}
p_gev <- function(x, theta){
pgev(x, theta[1], theta[2], theta[3])
}
gev_fit <- function( xdat, ...) {
gev.fit( xdat, ...)$mle
}
}
\keyword{ts}