Commit 160dc3fe by Manuela Brunner

Modification of fitting and backtransformation to allow for any type of distribution.
Problem with the argument CDF. It currently is a variable, which needs to be pre-specified. How can we just provide any character string e.g. "gev" instead of using CDF <- "gev"?
parent 6d468b45
 ... ... @@ -17,7 +17,7 @@ Description: Provides a simulation framework to simulate streamflow time series streamflow values and their temporal correlation as expressed by short- and long-range dependence. The approach is based on the randomization of the phases of the Fourier transform. We further use the flexible four-parameter Kappa distribution, which allows for the extrapolation to yet unobserved low and high flows. A detailed description of for the extrapolation to yet unobserved low and high flows. Alternatively, the empirical or any other distribution can be used. A detailed description of the simulation approach and an application example can be found in https://www.hydrol-earth-syst-sci-discuss.net/hess-2019-142/. URL: https://git.math.uzh.ch/reinhard.furrer/PRSim-devel ... ...
 importFrom("homtest", "Lmoments", "rand.kappa", "par.kappa") importFrom("stats", "rnorm", "runif", "fft", "ks.test") importFrom("graphics", "hist") export("prsim")
 prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, marginal=c("kappa","empirical","CDF.fit"), draw="rCDF", marginal=c("kappa","empirical",CDF),n_par=4, verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=c(NULL, "AD", "KS"), ...){ ifft <- function (x) fft(x, inverse = TRUE)/length(x) ... ... @@ -31,6 +31,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, data$timestamp <- as.POSIXct(strptime(tmp, format="%Y %m %d", tz="GMT")) } ### test whether r_CDF, p_CDF, and CDF_fit are properly defined. ### @Reinhard: could you please implement these tests? ### remove February 29 data <- data[format(data$timestamp, "%m %d") != "02 29",] ... ... @@ -81,8 +83,8 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ### To enable a sufficient sample size by using daily values in moving window around day i (i.e., reduce uncertainty due to fitting) if(marginal=="kappa"){ p_vals <- numeric(365) kap_par_day <- matrix(0, nrow=365, ncol=4) density_kap <- list() par_day <- matrix(0, nrow=365, ncol=4) # density_kap <- list() ### define window length win_length <- c(1:win_h_length) for(d in c(1:365)){ ... ... @@ -105,7 +107,7 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, if(length(test)>1){ kap_par <- test kap_par_day[d,] <- unlist(kap_par) par_day[d,] <- unlist(kap_par) ### define vector of quantiles quant <- sort(data_window) thresh <- kap_par$xi + kap_par$alfa*(1 - kap_par$h^(-kap_par$k))/kap_par$k ... ... @@ -123,32 +125,73 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, if (tolower(GoFtest)=="ks") p_vals[d] <- ks.test(data_window, data_kap)$p.value ### kappa distribution not rejected at alpha=0.05 if (tolower(GoFtest)=="ad") p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi,alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h )$p.value ### NOTE: TRY if(length(try(ad.test(data_window,F.kappa,xi=kap_par$xi,alfa=kap_par$alfa,k=kap_par$k,h=kap_par$h)))==1){ p_vals[d] <- NA }else{ p_vals[d] <- ad.test(data_window, F.kappa, xi=kap_par$xi, alfa=kap_par$alfa, k=kap_par$k, h=kap_par$h)$p.value } } else{ if(d==1){ p_vals[d] <- NA kap_par_day[d,] <- NA par_day[d,] <- NA }else{ p_vals[d] <- p_vals[d-1] kap_par_day[d,] <- kap_par_day[d-1,] par_day[d,] <- par_day[d-1,] } } } ###qu: check if subsequent NAs work properly. ### replace NA entries by values of subsequent day if(length(which(is.na(kap_par_day[,1])))>0){ indices <- rev(which(is.na(kap_par_day[,1]))) if(length(which(is.na(par_day[,1])))>0){ indices <- rev(which(is.na(par_day[,1]))) for(i in 1:length(indices)){ kap_par_day[indices[i],] <- kap_par_day[indices[i]+1,] par_day[indices[i],] <- par_day[indices[i]+1,] } } } # which(unlist(p_val_list)<0.05) ### non-rejected for most days, difficulty with winter months (relation to hp production?) ### use either a predefined distribution in R or define own function if(marginal!=c("kappa","empirical")){ p_vals <- numeric(365) par_day <- matrix(0, nrow=365, ncol=n_par) for(d in c(1:365)){ ### define window length win_length <- seq(1:15) ### define start and end of window before <- data$index[d+365-win_length] after <- data$index[d+365+win_length-1] ### define days within window ids <- c(before,after) ### determine values in window around day i data_window <- data$Qobs[which(data$index%in%ids)] ### fit generalized gamma distribution # fitdistr(x=data_window,dgengamma.stacy,start=list("scale"=1,"k"=6,"d"=2)) ### package VGAM: fitting does not work ### use function from package felxsurv # theta <- CDF.fit(xdat=data_window) theta <- match.fun(paste(CDF,"_fit",sep=""))(xdat=data_window) # do.call(paste(CDF,".fit",sep=""),list(xdat=data_window)) ### goodness of fit test # data_random <- rCDF(n=length(data_window),theta=theta) data_random <- match.fun(paste("r_",CDF,sep=""))(n=length(data_window),theta=theta) # density_gengam[[d]] <- density(data_gengam) hist(data_window) hist(data_random,add=T,col="red") if (tolower(GoFtest)=="ks"){ p_vals[d] <- ks.test(data_window,data_random)$p.value } if (tolower(GoFtest)=="ad"){ p_vals[d] <- ad.test(data_window,match.fun(paste("p_",CDF,sep="")),theta)$p.value } ### store parameters par_day[d,] <- theta } } ### Deseasonalization was not found to be necessary. Use normalized data directly. data$des <- data$norm ... ... @@ -249,13 +292,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, ### use kappa distribution for backtransformation if(marginal=="kappa"){ colnames(kap_par_day) <- names(kap_par) colnames(par_day) <- names(kap_par) ### use monthly Kappa distribution for backtransformation ### simulate random sample of size n from Kappa disribution data_day$kappa <- rand.kappa(length(data_day$Qobs), xi=kap_par_day[d,"xi"],alfa=kap_par_day[d,"alfa"], k=kap_par_day[d,"k"],h=kap_par_day[d,"h"]) xi=par_day[d,"xi"],alfa=par_day[d,"alfa"], k=par_day[d,"k"],h=par_day[d,"h"]) # if(marginal=="wakeby"){ # ### use Wakeby distribution for backtrasformation ... ... @@ -295,6 +338,25 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$Qobs[data_new$rank[which(data$index%in%c(d))]] # } } ### use any predefined distribution for backtransformation if(marginal!=c("kappa","empirical")){ ### use monthly distribution for backtransformation ### simulate random sample of size n from disribution data_day$cdf <- match.fun(paste("r_",CDF,sep=""))(n=length(data_day$Qobs),par_day[d,]) data_day$rank <- rank(data_day$cdf) data_new$rank <- rank(data_new$seasonal) # hist(data_day$Qobs) # hist(data_day$cdf,add=T,col="blue") # data_day$rank <- rank(data_day$cdf) data_new$rank[which(data$index%in%c(d))] <- rank(data_new[which(data$index%in%c(d)),]$seasonal) ### derive corresponding values from the kappa distribution ### identify value corresponding to rank in the kappa time series data_ordered <- data_day[order(data_day$rank),] data_new$simulated_seasonal[which(data_new$index%in%c(d))] <- data_ordered$cdf[data_new$rank[which(data$index%in%c(d))]] } } # end for loop data_sim[[r]] <- data_new$simulated_seasonal ... ... @@ -310,13 +372,13 @@ prsim <- function(data, station_id="Qobs", number_sim=1, win_h_length=15, deseaonalized=data$des, data_sim) if (!KStest) { if (is.null(GoFtest)) { p_vals <- NULL } if(marginal=="kappa"){ if (marginalpar) { # also return intermediate results return(list( simulation=data_stoch, pars=kap_par_day, p_val=p_vals)) return(list( simulation=data_stoch, pars=par_day, p_val=p_vals)) } else { return(list( simulation=data_stoch, pars=NULL, p_val=p_vals)) } ... ...  new argument marginal=c("kappa", "empirical", "gev.fit") drawmarginal="rgev" rCDF <- function(n, theta) { rnorm(n, theta[1], sqrt(theta[2])) } CDF.fit <- function(xdat,...) { gev.fit(xdat,..., show=FALSE)$mle} > data(portpirie) > gev.fttt(portpirie[,2]) \ No newline at end of file
 ... ... @@ -2,8 +2,40 @@ data(runoff) out <- prsim(data=runoff, number_sim=1, marginal="empirical") out <- prsim(data=runoff, number_sim=1, marginal="kappa") out <- prsim(data=runoff, number_sim=1, marginal="kappa",GoFtest = "KS") ### GEV distribution r_gev <- function(n, theta){ rgev(n, theta[1], theta[2], theta[3]) } p_gev <- function(x, theta){ pgev(x, theta[1], theta[2], theta[3]) } gev_fit <- function( xdat, ...) { gev.fit( xdat, ...)$mle } ### generalized Pareto distribution of the second kind r_gb2 <- function(n, theta){ rgb2(n, theta[1], theta[2], theta[3], theta[4]) } p_gb2 <- function(x, theta){ pgb2(x, theta[1], theta[2], theta[3], theta[4]) } gb2_fit <- function( xdat, ...) { mlfit.gb2( xdat, ...)[[2]]$par } ### @ Reinhard: This is not very elegant yet. CDF needs to be specified before giving it to the function. ### I guess this can be alternatively resolved by using a different argument for marginal. Any ideas? CDF <-"gev" out <- prsim(data=runoff, number_sim=1, marginal=CDF,GoFtest = "KS",n_par=3) CDF <-"gb2" out <- prsim(data=runoff, number_sim=1, marginal="gb2",GoFtest = "KS",n_par=4) sim <- out$simulation # p_val <- out$p_val par(mai=c(.9,.9,.1,.1)) plot(sim$timestamp[1:1000], sim$Qobs[1:1000], type="l", xlab="Time [d]", ylab=expression(paste("Discharge [m"^3,"/s]"))) ... ...
 ... ... @@ -6,19 +6,20 @@ Applies the algorithm to a single station } \usage{ prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa", verbose=TRUE, kappapar=TRUE, suppWarn=FALSE, KStest=FALSE, ...) prsim(data, station_id="Qobs", number_sim=1, win_h_length=15,marginal="kappa",n_par=4, verbose=TRUE, marginalpar=TRUE, suppWarn=FALSE, GoFtest=FALSE, ...) } \arguments{ \item{data}{data frame containing the time indications and runoff of at least one station. See \sQuote{Details}.} \item{station_id}{identifies the station in case several runoffs are present in \code{data}. See \sQuote{Details}.} \item{win_h_length}{(half-)length of moving window size.} \item{number_sim}{number of simulations to be carried out.} \item{marginal}{marginal distribution to be used for the backtransformation.} \item{marginal}{marginal distribution to be used for the backtransformation. Can be either "kappa", "empirical", or CDF. "kappa" uses the four-parameter kappa distribution for backtransformation, "empirical" uses the empirical distribution. CDF allows for specifying any distribution \sQuote{Examples}.} \item{n_par}{number of parameters of the marginal distribution used} \item{verbose}{logical. Should progress be reported?} \item{marginalpar}{logical. Should the estimated parameters of the kappa distribution be returned?} \item{marginalpar}{logical. Should the estimated parameters of the distribution used be returned?} \item{suppWarn}{logical. See \sQuote{Details}.} \item{KStest}{logical. Should a \code{ks.test} for daily data be performed and the p-values be passed back?} \item{GoFtest}{NULL, KS or AD. Should a goodness-of-fit test for daily data be performed and the p-values be passed back? NULL performs no test, KS performs a Kolmogorof-Smirnov test, and AD performs an Anderson-Darling test.} \item{...}{any other argument passed to the sub-function specifying the cdf for fitting and drawing. See \sQuote{Details} and \sQuote{Examples}. } } \details{ ... ... @@ -61,20 +62,33 @@ out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE) # warnings() # as a follow-up to suppWarn=TRUE ## Specifying particular CDF: ## example with generalized Beta distribution of the second kind require( "GB2") rCDF <- function(n, theta){ CDF <- "gb2" r_gb2 <- function(n, theta){ rgb2(n, theta[1], theta[2], theta[3], theta[4]) } pCDF <- function(x, theta){ p_gb2 <- function(x, theta){ pgb2(x, theta[1], theta[2], theta[3], theta[4]) } CDF.fit <- function( xdat, ...) { gb2_fit <- function( xdat, ...) { mlfit.gb2( xdat, ...)[[2]]$par } out <- prsim( runoff[ runoff$YYYY<1980, ], "Qobs", 1, suppWarn=TRUE, marginal="CDF.fit", draw="rCDF") ## example with the Generalized Extreme Value (GEV) distribution ### test with GEV distribution CDF <- "gev" r_gev <- function(n, theta){ rgev(n, theta[1], theta[2], theta[3]) } p_gev <- function(x, theta){ pgev(x, theta[1], theta[2], theta[3]) } gev_fit <- function( xdat, ...) { gev.fit( xdat, ...)$mle } } \keyword{ts}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!