Commit 5b9ae716 authored by Manuela Brunner's avatar Manuela Brunner

Implementation of two examples GEV and GB2

parent 1ee6021e
......@@ -25,6 +25,6 @@ BugReports: https://git.math.uzh.ch/reinhard.furrer/PRSim-devel
License: GPL-3
Encoding: UTF-8
LazyData: true
Depends: homtest, goftest
Depends: homtest, goftest, graphics, ismev, evd
Suggests: lattice, GB2
Imports: stats
importFrom("homtest", "Lmoments", "rand.kappa", "par.kappa", "F.kappa")
importFrom("stats", "rnorm", "runif", "fft", "ks.test")
<<<<<<< HEAD
importFrom("graphics", "hist")
=======
importFrom("goftest", "ad.test")
>>>>>>> f0241bb562c45952cb5f9f249dbd9b2585c469b0
importFrom("ismev","gev.fit")
importFrom("evd","rgev","pgev")
importFrom("GB2","mlfit.gb2","rgb2","pgb2")
export("prsim")
prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal=c("kappa","empirical",CDF),n_par=4,
marginal=c("kappa","empirical","gev"),n_par=4,
verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=c(NULL, "AD", "KS"), ...){
### Reinhard: Do we somehow need to include "others" under marginal. Or is this fine?
ifft <- function (x) fft(x, inverse = TRUE)/length(x)
......@@ -9,11 +10,12 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
marginal <- tolower(marginal)[1]
if (!(marginal %in% c("kappa","empirical"))) { # check if distributions exist
if (!is.character(marginal)) stop("'marginal' should be a character string.")
rCDF <- get(paste0("r",marginal), mode = "function")
pCDF <- get(paste0("p",marginal), mode = "function")
CDF.fit <- get(paste0(marginal,".fit"), mode = "function")
}
r_CDF <- get(paste0("r_",marginal), mode = "function")
p_CDF <- get(paste0("p_",marginal), mode = "function")
CDF_fit <- get(paste0(marginal,"_fit"), mode = "function")
### I had to change the function names to not yet existing function names. Otherwise, the functions get stuck in recursive loops.
### functions themselves are defines outside the prsim function because they have diff
}
if (!is.null(GoFtest)) {
GoFtest <- toupper(GoFtest)[1]
......@@ -44,9 +46,6 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
names(data) <- c("YYYY","MM","DD","Qobs")
data$timestamp <- as.POSIXct(strptime(tmp, format="%Y %m %d", tz="GMT"))
}
### test whether r_CDF, p_CDF, and CDF_fit are properly defined.
### @Reinhard: could you please implement these tests?
### remove February 29
data <- data[format(data$timestamp, "%m %d") != "02 29",]
......@@ -169,7 +168,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
# which(unlist(p_val_list)<0.05) ### non-rejected for most days, difficulty with winter months (relation to hp production?)
### use either a predefined distribution in R or define own function
if(marginal!=c("kappa","empirical")){
if(marginal!="kappa" & marginal!="empirical"){
p_vals <- numeric(365)
par_day <- matrix(0, nrow=365, ncol=n_par)
for(d in c(1:365)){
......@@ -187,12 +186,12 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
# fitdistr(x=data_window,dgengamma.stacy,start=list("scale"=1,"k"=6,"d"=2)) ### package VGAM: fitting does not work
### use function from package felxsurv
# theta <- CDF.fit(xdat=data_window)
theta <- match.fun(paste(CDF,"_fit",sep=""))(xdat=data_window)
theta <- CDF_fit(xdat=data_window)
# do.call(paste(CDF,".fit",sep=""),list(xdat=data_window))
### goodness of fit test
# data_random <- rCDF(n=length(data_window),theta=theta)
data_random <- match.fun(paste("r_",CDF,sep=""))(n=length(data_window),theta=theta)
data_random <- r_CDF(n=length(data_window),theta=theta)
# density_gengam[[d]] <- density(data_gengam)
hist(data_window)
......@@ -201,7 +200,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
p_vals[d] <- ks.test(data_window,data_random)$p.value
}
if (tolower(GoFtest)=="ad"){
p_vals[d] <- ad.test(data_window,match.fun(paste("p_",CDF,sep="")),theta)$p.value
p_vals[d] <- ad.test(data_window,p_CDF,theta)$p.value
}
### store parameters
par_day[d,] <- theta
......@@ -355,10 +354,10 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
}
### use any predefined distribution for backtransformation
if(marginal!=c("kappa","empirical")){
if(marginal!="kappa" & marginal!="empirical"){
### use monthly distribution for backtransformation
### simulate random sample of size n from disribution
data_day$cdf <- match.fun(paste("r_",CDF,sep=""))(n=length(data_day$Qobs),par_day[d,])
data_day$cdf <- r_CDF(n=length(data_day$Qobs),par_day[d,])
data_day$rank <- rank(data_day$cdf)
data_new$rank <- rank(data_new$seasonal)
......@@ -397,10 +396,10 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15,
return(list( simulation=data_stoch, pars=par_day, p_val=p_vals))
p_vals <- NULL
}
}
if(marginal != "empirical"){
if (marginalpar) { # also return intermediate results
return(list( simulation=data_stoch, pars=kap_par_day, p_val=p_vals))
return(list( simulation=data_stoch, pars=par_day, p_val=p_vals))
} else {
return(list( simulation=data_stoch, pars=NULL, p_val=p_vals))
}
......
......@@ -26,12 +26,10 @@ gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par
}
### @ Reinhard: This is not very elegant yet. CDF needs to be specified before giving it to the function.
### I guess this can be alternatively resolved by using a different argument for marginal. Any ideas?
CDF <-"gev"
out <- prsim(data=runoff, number_sim=1, marginal=CDF,GoFtest = "KS",n_par=3)
CDF <-"gb2"
### GEV
out <- prsim(data=runoff, number_sim=1, marginal="gev",GoFtest = "KS",n_par=3)
### Generalized Beta of the second kind
out <- prsim(data=runoff, number_sim=1, marginal="gb2",GoFtest = "KS",n_par=4)
sim <- out$simulation
......
......@@ -6,7 +6,6 @@
Applies the algorithm to a single station
}
\usage{
<<<<<<< HEAD
prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa",n_par=4,
verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=FALSE, ...)
}
......@@ -14,15 +13,13 @@ prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa",n_
\item{data}{data frame containing the time indications and runoff of at least one station. See \sQuote{Details}.}
\item{station_id}{identifies the station in case several runoffs are present in \code{data}. See \sQuote{Details}.}
\item{number_sim}{number of simulations to be carried out.}
\item{marginal}{marginal distribution to be used for the backtransformation. Can be either "kappa", "empirical", or CDF. "kappa" uses the four-parameter kappa distribution for backtransformation, "empirical" uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.}
\item{marginal}{marginal distribution to be used for the backtransformation. Can be either "kappa", "empirical", or any type of CDF (see \sQuote{Details}). "kappa" uses the four-parameter kappa distribution for backtransformation, "empirical" uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.}
\item{n_par}{number of parameters of the marginal distribution used}
\item{win_h_length}{(half-)length of moving window size.}
\item{marginal}{marginal distribution to be used for the backtransformation, see \sQuote{Details}.}
\item{verbose}{logical. Should progress be reported?}
\item{marginalpar}{logical. Should the estimated parameters of the distribution used be returned?}
\item{suppWarn}{logical. See \sQuote{Details}.}
\item{GoFtest}{NULL, KS or AD. Should a goodness-of-fit test for daily data be performed and the p-values be passed back? NULL performs no test, KS performs a Kolmogorof-Smirnov test, and AD performs an Anderson-Darling test.}
\item{GoFtest}{If (non-null) a GoF test for daily data should be performed, which one (\code{"AD"}, \code{KS}, see \sQuote{Details})}
\item{GoFtest}{NULL, KS or AD. Should a goodness-of-fit test for daily data be performed and the p-values be passed back? NULL performs no test, KS performs a Kolmogorof-Smirnov test, and AD performs an Anderson-Darling test. If (non-null) a GoF test for daily data should be performed, which one (\code{"AD"}, \code{KS}, see \sQuote{Details})}
\item{...}{any other argument passed to the sub-function specifying the cdf for fitting and drawing. See \sQuote{Details} and \sQuote{Examples}. }
}
\details{
......@@ -34,18 +31,16 @@ Stations are identified by column name (default \code{"Qobs"}), or by column ind
The function \code{homtest::par.kappa} might issue quite a few warnings of type \code{In fn(par, ...) : value out of range in 'gammafn'}. The argument \code{suppWarn} allows to silence warnings for the specific function call via \code{suppressWarnings()}. Of course, a subsequent check via \code{warnings()} is recommended.
Alternative distributions can be specified by providing three functions: (1) a function fitting the parameters of a distributions and providing a vector of these parameters as output (CDF_fit), (2) a function simulating random numbers from this distribution (r_CDF), and (3) a function specifying the distribution (p_CDF). \sQuote{Examples} for the generalized beta for the second kind and for the Generalized Extreme Values (GEV) distribution.
To specify alternatives distr ... we need a function iwth input giving output and a funciton ... see \sQuote{Examples} for generalized beta for the second kind.
Add remark about AD test!
When using the kappa distribution, the AD test can for certain values of the parameter h not be performed.
}
\value{A list with elements
\item{simulation }{A data frame with time information, observations, deseaonalized observations and
\code{number_sim} columns containing the simulated runoff.}
\item{pars}{A matrix containing the estimated parameters of the marginal distribution (if \code{marginalpar}).}
\item{p_val}{A vector containing the p-values of \code{ks.test} applied to the daily detrended data (if \code{KStest}).}
\item{p_val}{A vector containing the p-values of \code{ks.test} or \code{ad.test} applied to the daily detrended data (if \code{GoFtest} is not NULL)}
}
\references{
Brunner, Bardossy, Furrer (2019) Technical note: Stochastic simulation of streamflow time series using phase randomization. under review. https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/.
......@@ -81,7 +76,7 @@ gb2_fit <- function( xdat, ...) {
mlfit.gb2( xdat, ...)[[2]]$par
}
out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE,
marginal="CDF.fit", draw="rCDF")
marginal="gb2")
## example with the Generalized Extreme Value (GEV) distribution
### test with GEV distribution
......@@ -95,6 +90,9 @@ out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE,
gev_fit <- function( xdat, ...) {
gev.fit( xdat, ...)$mle
}
out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE,
marginal="gev",n_par=3)
}
\keyword{ts}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment