Commit 2e2369f6 authored by Florian Gerber's avatar Florian Gerber
Browse files

v 0.5

parents
^.*\.Rproj$
^\.Rproj\.user$
^\.git/*$
^.gitignore$
^.*\.tar\.gz$
\ No newline at end of file
Package: optimParallel
Type: Package
Title: Parallel Optim
Version: 0.5
Date: 2018-04-03
Author: Florian Gerber
Maintainer: Florian Gerber <florian.gerber@math.uzh.ch>
Description: provides a parallel version of the gradient based
stats::optim() methods "L-BFGS-B", "BFGS", and "CG". This can lead
to a significantly performance boost of stats::optim().
License: GPL (>= 2)
URL: https://git.math.uzh.ch/florian.gerber/optimParallel
BugReports: https://git.math.uzh.ch/florian.gerber/optimParallel
Depends: R (>= 3.1), stats, parallel
Suggests: roxygen2, spam, testthat
RoxygenNote: 6.0.1
# Generated by roxygen2: do not edit by hand
export(optimParallel)
importFrom(parallel,clusterExport)
importFrom(stats,optim)
do.call
parallel_fg_generator <- function(fn, gr=NULL, args_list=NULL,
one_sided=FALSE, lower=-Inf, upper=Inf,
lapply_variant = getOption("optimParallel.lapply"),
ndeps = 1e-3, fnscale=NULL, parscale=NULL){
stopifnot(is.function(fn))
if(is.null(fnscale)) fnscale <- 1
if(is.null(parscale)) parscale <- 1
if(any(is.na(lower))) lower[is.na(lower)] <- -Inf
if(any(is.na(upper))) lower[is.na(upper)] <- Inf
fun <- function(par, ...){ fn(par, ...)/fnscale }
if(!is.null(gr))
gra <- function(par, ...){ fn(par, ...)/fnscale }
eval <- function(par){
## the first argument of fun has to be a vector of length length(par)
if(identical(par, par_last))
return(list(value=value, grad=grad))
par_last <<- par
if(is.null(gr)){
n <- length(par)
ndeps <- ndeps*parscale
ndeps_mat <- array(0, c(n,n))
ndeps_vec <- rep(ndeps, length.out=n)
diag(ndeps_mat) <- ndeps_vec
if(one_sided){
ndepsused <- ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PARl <- data.frame(cbind(array(par, c(n,n))-ndeps_mat))
PAR[hitu] <- PARl[hitu]
ndepsused[hitu] <- -ndeps_vec[hitu]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list))
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list)
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
grad <<- (e-value)/ndepsused
} else { # two sided
ndepsused <- 2*ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat, array(par, c(n,n))-ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PAR[hitu] <- par
hitui <- apply(matrix(hitu, ncol=2), 1, any)
ndepsused[hitui] <- ndeps_vec[hitui]
}
}
if(!(is.null(lower) || all(is.na(lower)) || all(lower==Inf))){
hitl <- unlist(lapply(PAR, function(par){any(par<lower)}))
if(any(hitl)){
PAR[hitl] <- par
hitli <- apply(matrix(hitl, ncol=2), 1, any)
ndepsused[hitli] <- ndeps_vec[hitli]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list))
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list)
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
e_mat <- matrix(e, ncol=2)
grad <<- c(e_mat[,1]-e_mat[,2])/ndepsused
}
}else{ # gr is not null
args <- c(par=par, args_list)
expr <- list(expression(do.call("fun", args=args)),
expression(do.call("gra", args=args)))
args2 <- list(expr, eval, envir=parent.frame(2))
res <- do.call("lapply", args2)
value <<- res[[1]]
grad <<- res[[2]]
}
i_e <<- i_e+1
return(list(value=value, grad=grad))
}
f <- function(par){
eval(par)
i_f <<- i_f+1
return(value)
}
g <- function(par){
eval(par)
i_g <<- i_g+1
return(grad)
}
init <- function(){
i_f <<- i_g <<- i_e <<- 0
par_last <<- value <<- grad <<- NA
}
countes <- function(){
c(i_e, i_f, i_g)
}
i_f <- i_g <- i_e <- 0
par_last <- value <- grad <- NA
list(f=f, g=g, init=init, eval=eval, countes=countes)
}
parallel_fg_generator <- function(fn, gr=NULL, args_list=NULL,
one_sided=FALSE, lower=-Inf, upper=Inf,
lapply_variant = getOption("optimParallel.lapply"),
ndeps = 1e-3, fnscale=NULL, parscale=NULL){
stopifnot(is.function(fn))
if(is.null(fnscale)) fnscale <- 1
if(is.null(parscale)) parscale <- 1
if(any(is.na(lower))) lower[is.na(lower)] <- -Inf
if(any(is.na(upper))) lower[is.na(upper)] <- Inf
fun <- function(par, ...){ fn(par, ...)/fnscale }
if(!is.null(gr))
gra <- function(par, ...){ fn(par, ...)/fnscale }
eval <- function(par){
## the first argument of fun has to be a vector of length length(par)
if(identical(par, par_last))
return(list(value=value, grad=grad))
par_last <<- par
if(is.null(gr)){
n <- length(par)
ndeps <- ndeps*parscale
ndeps_mat <- array(0, c(n,n))
ndeps_vec <- rep(ndeps, length.out=n)
diag(ndeps_mat) <- ndeps_vec
if(one_sided){
ndepsused <- ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PARl <- data.frame(cbind(array(par, c(n,n))-ndeps_mat))
PAR[hitu] <- PARl[hitu]
ndepsused[hitu] <- -ndeps_vec[hitu]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list))
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list)
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
grad <<- (e-value)/ndepsused
} else { # two sided
ndepsused <- 2*ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat, array(par, c(n,n))-ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PAR[hitu] <- par
hitui <- apply(matrix(hitu, ncol=2), 1, any)
ndepsused[hitui] <- ndeps_vec[hitui]
}
}
if(!(is.null(lower) || all(is.na(lower)) || all(lower==Inf))){
hitl <- unlist(lapply(PAR, function(par){any(par<lower)}))
if(any(hitl)){
PAR[hitl] <- par
hitli <- apply(matrix(hitl, ncol=2), 1, any)
ndepsused[hitli] <- ndeps_vec[hitli]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list))
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list)
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
e_mat <- matrix(e, ncol=2)
grad <<- c(e_mat[,1]-e_mat[,2])/ndepsused
}
}else{ # gr is not null
args <- c(par=par, args_list)
expr <- list(expression(do.call("fun", args=args)),
expression(do.call("gra", args=args)))
args2 <- list(expr, "eval", envir=parent.frame(2))
res <- do.call("lapply", args2)
value <<- res[[1]]
grad <<- res[[2]]
}
i_e <<- i_e+1
return(list(value=value, grad=grad))
}
f <- function(par){
eval(par)
i_f <<- i_f+1
return(value)
}
g <- function(par){
eval(par)
i_g <<- i_g+1
return(grad)
}
init <- function(){
i_f <<- i_g <<- i_e <<- 0
par_last <<- value <<- grad <<- NA
}
countes <- function(){
c(i_e, i_f, i_g)
}
i_f <- i_g <- i_e <- 0
par_last <- value <- grad <- NA
list(f=f, g=g, init=init, eval=eval, countes=countes)
}
fun <- sum
gra <- prod
par=c(1,2)
args_list=NULL
args2
str(args2)
eval
expr
lapply(expr, eval )
lapply(expr, "eval")
args
args
args
lapply_variant="lapply"
vaue
value
#' @name optimParallel-package
#' @aliases optimParallel-Package optimParallel-package OptimParallel-Package
#' @author Florian Gerber, \email{florian.gerber@@math.uzh.ch}.
#' @title Overview
#' @description
#' The function provides a parallel version of the gradient based \code{\link{optim}} methods
#' "L-BFGS-B", "BFGS", and "CG".
#' If the evaluation of the target function takes more that 0.5 seconds, \code{\link{optimParallel}} can reduce the optimization time significantly.
#' For "L-BFGS-B" the speed increase is between factor 2 in the case where \code{gr} is specified and
#' factor 2p+1 (p = number of parameters) in the case where \code{gr} is not specified.
#' @seealso \code{\link{optimParallel}}
#' @docType package
#' @name gapfill-package
#' @keywords package
NULL
#' @name optimParallel
#' @aliases optim parallel
#' @author Florian Gerber, \email{florian.gerber@@math.uzh.ch}.
#' @title parallel version of \code{\link{optim}}
#' @description
#' The function provides a parallel version of the gradient based \code{\link{optim}} methods
#' "L-BFGS-B", "BFGS", and "CG".
#' If the evaluation of the target function takes more that 0.5 seconds, \code{\link{optimParallel}} can reduce the optimization time significantly.
#' For "L-BFGS-B" the speed increase is between factor 2 in the case where \code{gr} is specified and
#' factor 2p+1 (p = number of parameters) in the case where \code{gr} is not specified.
#' @param par see the documentation of \code{\link{optim}}.
#' @param fn see the documentation of \code{\link{optim}}.
#' @param gr see the documentation of \code{\link{optim}}.
#' @param ... see the documentation of \code{\link{optim}}.
#' @param method only the gradient based methods "L-BFGS-B", "BFGS", and "CG" are available.
#' See the documentation of \code{\link{optim}}.
#' @param lower see the documentation of \code{\link{optim}}.
#' @param upper see the documentation of \code{\link{optim}}.
#' @param control see the documentation of \code{\link{optim}}.
#' @param hessian see the documentation of \code{\link{optim}}.
#' @param parallel_args is a list control parameters for the parallel execution.
#' \itemize{
#' \item{\code{lapply}}{charcter vector of length 1. The default is "mclapply" (not Windows) and "parLapply" (Windows)}
#' \item{\code{one_sided}}{logical vector of length 1. If \code{TRUE} (default) two-sided derivatives are calculated.}
#' \item{\code{cl}}{object of class 'cluster'.}
#' }
#'
#' @return Same as \code{\link{optim}}. See the documentation thereof.
#'
#' @details
#' The R package parallel in combination with a reference class like construct is used
#' to evaluate the target function \code{fn} and the gradient \code{gr} in parallel.
#'
#' @note Currently only the method "L-BFGS-B" is tested with unit tests.
#'
#' @seealso \code{\link{optim}}
#' @examples
#' \dontrun{
#' neg2ll <- function(par, sleep=0, verbose=FALSE){
#' if(verbose)
#' cat(par, "\n")
#' Sys.sleep(sleep)
#' -sum(dnorm(x=x, mean=par[1], sd=par[2], log=TRUE))
#' }
#' set.seed(13); x <- rnorm(1000, 5, 2)
#'
#' ## works only on Linux like platforms:
#' ## -----------------------------------
#' detectCores() # available cores
#' options(mc.cores=detectCores()) # set number of cores
#' options(optimParallel.lapply="mclapply") # used by default
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' method = "L-BFGS-B", lower=c(-Inf, .0001))
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' verbose=TRUE, sleep=.5, # args to neg2ll()
#' method="L-BFGS-B", lower=c(-Inf, .0001),
#' control=list(factr=.01/.Machine$double.eps))
#' ## each step invokes 5 parallel calls to neg2ll()
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' verbose=TRUE, sleep=.5,
#' method ="L-BFGS-B", lower=c(-Inf, .0001),
#' control=list(factr=.01/.Machine$double.eps),
#' parallel_args=list(one_sided=TRUE))
#' ## each step invokes 3 parallel calls to neg2ll()
#'
#'
#' ## For Linux and Windows platforms:
#' ## --------------------------------
#' options(optimParallel.lapply="parLapply")
#' cl <- makeCluster(detectCores())
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' x=x, # it is required to pass all data used by fn
#' method = "L-BFGS-B", lower=c(-Inf, .0001),
#' parallel_args=list(cl=cl))
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' x=x, verbose=TRUE, sleep=.5,
#' method="L-BFGS-B", lower=c(-Inf, .0001),
#' control=list(factr=.01/.Machine$double.eps),
#' parallel_args=list(cl=cl))
#' ## note: printing to screen does not work with clusters
#'
#' optimParallel(par=c(1,1), fn=neg2ll,
#' x=x, verbose=TRUE, sleep=.5,
#' method ="L-BFGS-B", lower=c(-Inf, .0001),
#' control=list(factr=.01/.Machine$double.eps),
#' parallel_args=list(cl=cl, one_sided=TRUE))
#'
#' stopCluster(cl)
#'
#' ## to perform one sided derivatives by default set:
#' #options(optimParallel.one_sided=TRUE)
#' }
#' @export
#' @importFrom stats optim
#' @importFrom parallel clusterExport
optimParallel <- function(par, fn, gr = NULL, ..., method = c("BFGS", "L-BFGS-B", "CG"),
lower = -Inf, upper = Inf, control = list(), hessian = FALSE,
parallel_args=list()){
dots <- list(...)
if(!(method %in% c("BFGS", "L-BFGS-B", "CG")))
stop("Only the gradient methods \"BFGS\", \"L-BFGS-B\", \"CG\" are available.")
if(is.null(parallel_args$lapply))
parallel_args$lapply <- getOption("optimParallel.lapply")
if(is.null(parallel_args$one_sided))
parallel_args$one_sided <- getOption("optimParallel.one_sided")
if(is.null(control$ndeps))
control$ndeps <- 1e-3
if(is.null(control$fnscale)){
fnscale <- 1
} else {
fnscale <- control$fnscale
control$fnscale <- NULL
}
parscale <- control$parscale
fg <- parallel_fg_generator(fn=fn, gr=gr, args_list=dots, one_sided=parallel_args$one_sided,
lower=lower, upper=upper,
lapply_variant = parallel_args$lapply, cl=parallel_args$cl, ndeps=control$ndeps,
fnscale=fnscale, parscale=parscale)
out <- stats::optim(par=par, fn=fg$f, gr=fg$g, method = method, lower=lower,
upper=upper, control=control, hessian=hessian)
out$value <- out$value*fnscale
out
}
parallel_fg_generator <- function(fn, gr=NULL, args_list=list(),
one_sided=FALSE, lower=-Inf, upper=Inf,
lapply_variant = getOption("optimParallel.lapply"),
cl=NULL, ndeps = 1e-3, fnscale=NULL, parscale=NULL){
stopifnot(is.function(fn),
is.null(gr) || is.function(gr),
is.list(args_list),
is.logical(one_sided), length(one_sided)==1,
is.numeric(lower), is.numeric(upper),
is.character(lapply_variant), length(lapply_variant)==1,
is.null(ndeps) || is.numeric(ndeps),
is.null(fnscale) || is.numeric(fnscale),
is.null(parscale) || is.numeric(parscale)
)
if(is.null(fnscale)) fnscale <- 1
if(is.null(parscale)) parscale <- 1
if(any(is.na(lower))) lower[is.na(lower)] <- -Inf
if(any(is.na(upper))) lower[is.na(upper)] <- Inf
if(lapply_variant == "parLapply"){
if(is.null(cl))
stop("specify cluster with no argument 'cl' given to 'parallel_args'.")
if(!is.null(args_list)){
env <- new.env()
for(i in seq_along(args_list))
assign(x=names(args_list)[i], args_list[[i]], envir=env)
parallel::clusterExport(cl=cl, varlist=names(args_list), envir=env)
}
}
fun <- function(par, ...){ fn(par, ...)/fnscale }
if(!is.null(gr))
gra <- function(par, ...){ gr(par, ...)/fnscale }
eval <- function(par){
## the first argument of fun has to be a vector of length length(par)
if(identical(par, par_last))
return(list(value=value, grad=grad))
par_last <<- par
if(is.null(gr)){
n <- length(par)
ndeps <- ndeps*parscale
ndeps_mat <- array(0, c(n,n))
ndeps_vec <- rep(ndeps, length.out=n)
diag(ndeps_mat) <- ndeps_vec
if(one_sided){
ndepsused <- ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PARl <- data.frame(cbind(array(par, c(n,n))-ndeps_mat))
PAR[hitu] <- PARl[hitu]
ndepsused[hitu] <- -ndeps_vec[hitu]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list))
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list[names(args_list) %in% names(formals(fn))])
if(lapply_variant == "parLapply"){
args <- c(list(cl=cl), args)
names(args)[names(args)=="FUN"] <- "fun"
}
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
grad <<- (e-value)/ndepsused
} else { # two sided
ndepsused <- 2*ndeps_vec
PAR <- data.frame(cbind(array(par, c(n,n))+ndeps_mat, array(par, c(n,n))-ndeps_mat))
if(!(is.null(upper) || all(is.na(upper)) || all(upper==Inf))){
hitu <- unlist(lapply(PAR, function(par){any(par>upper)}))
if(any(hitu)){
PAR[hitu] <- par
hitui <- apply(matrix(hitu, ncol=2), 1, any)
ndepsused[hitui] <- ndeps_vec[hitui]
}
}
if(!(is.null(lower) || all(is.na(lower)) || all(lower==Inf))){
hitl <- unlist(lapply(PAR, function(par){any(par<lower)}))
if(any(hitl)){
PAR[hitl] <- par
hitli <- apply(matrix(hitl, ncol=2), 1, any)
ndepsused[hitli] <- ndeps_vec[hitli]
}
}
PAR <- cbind(PAR, par)
if(is.null(args_list) || length(args_list)==0)
args <- list(X=PAR, FUN=fun)
else
args <- c(list(X=PAR, FUN=fun), args_list[names(args_list) %in% names(formals(fn))])
if(lapply_variant == "parLapply"){
args <- c(list(cl=cl), args)
names(args)[names(args)=="FUN"] <- "fun"
}
e <- unname(unlist(do.call(lapply_variant, args)))
value <<- e[length(e)]
length(e) <- length(e)-1
e_mat <- matrix(e, ncol=2)
grad <<- c(e_mat[,1]-e_mat[,2])/ndepsused
}
}else{ # gr is not null
if(is.null(args_list) || length(args_list)==0)
args_fun <- args_gra <- list(par=par)
else {
args_fun <- c(par=par, args_list[names(args_list) %in% names(formals(fn))])
args_gra <- c(par=par, args_list[names(args_list) %in% names(formals(gr))])
}
ll <- list(function(){ do.call("fun", args_fun)},
function(){ do.call("gra", args_gra)})
args <- list(X=ll, FUN=function(f) f())
if(lapply_variant == "parLapply"){
args <- c(list(cl=cl), args)
names(args)[names(args)=="FUN"] <- "fun"
}
res <- do.call(lapply_variant, args)
value <<- res[[1]]
grad <<- res[[2]]
}
i_e <<- i_e+1
return(list(value=value, grad=grad))
}
f <- function(par){
eval(par)
i_f <<- i_f+1
return(value)
}
g <- function(par){
eval(par)
i_g <<- i_g+1
return(grad)
}
init <- function(){
i_f <<- i_g <<- i_e <<- 0
par_last <<- value <<- grad <<- NA
}
countes <- function(){
c(i_e, i_f, i_g)
}
i_f <- i_g <- i_e <- 0
par_last <- value <- grad <- NA
list(f=f, g=g, init=init, eval=eval, countes=countes)
}
.onLoad <- function(libname, pkgname)
{
if(is.null(options()$optimParallel.lapply)){
if(.Platform$OS.type == "windows")
options(optimParallel.lapply = "parLapply")
else
options(optimParallel.lapply = "mclapply")
}
if(is.null(options()$optimParallel.one_sided))
options(optimParallel.one_sided = FALSE)
}
The R package optimParallel
===========================
The package provides a parallel version of the gradient based