Commit 22e144bb authored by Gilles Kratzer's avatar Gilles Kratzer
Browse files

W1 N2 + source

parent 5106081c
Pipeline #1553 passed with stage
in 1 second
^docs$
^_pkgdown\.yml$
^.*\.Rproj$
^\.Rproj\.user$
^README.Rmd
^\.gitlab-ci\.yml$
^NEWS.md
Package: mcmcabn
Title: Flexible implementation of a structural MCMC sampler for DAGs
Title: Flexible Implementation of a Structural MCMC Sampler for DAGs
Version: 0.1
Authors@R: c(person("Gilles", "Kratzer", role = c("aut", "cre"),
email = "gilles.kratzer@math.uzh.ch",
......@@ -19,3 +19,4 @@ Imports: gRbase, abn, coda, ggplot2, ggpubr, cowplot
Suggests: bnlearn, knitr, rmarkdown
RoxygenNote: 6.1.1
VignetteBuilder: knitr
URL: https://www.math.uzh.ch/pages/mcmcabn/
......@@ -8,7 +8,7 @@ plot.mcmcabn
importFrom(ggplot2,ggplot)
importFrom(coda,effectiveSize)
import(abn)
importFrom(ggplot2,aes,geom_line,geom_hline,geom_text,geom_point,labs,scale_color_manual,geom_density,scale_fill_manual,coord_flip,scale_colour_manual)
importFrom(ggplot2,aes,geom_line,geom_hline,geom_text,geom_point,labs,scale_color_manual,geom_density,scale_fill_manual,coord_flip,scale_colour_manual, aes_string)
importFrom(ggpubr, theme_pubr)
importFrom(cowplot, ggdraw, axis_canvas, insert_yaxis_grob)
importFrom("graphics", "plot")
......
......@@ -8,22 +8,25 @@
plot.mcmcabn <- function(x,
max.score = FALSE,
...){
#utils::globalVariables(c("X","method" ,"scores"))
#utils::globalVariables(c(".", "%>%"))
dta <- data.frame(x[-1])
dta$X <- 1:length(x$scores)
max.score. <- max(x$scores)
if(!max.score){
original_plot <- ggplot(data = dta, aes(x = X,y = scores)) +
original_plot <- ggplot(data = dta, aes_string(x = "X",y = "scores")) +
geom_line(alpha = 0.5) +
geom_hline(yintercept = max.score., linetype="dashed", color = "red", alpha = 0.8) +
geom_text(aes(0,max.score.,label = round(max.score., digits = 2), vjust = -.5),color="red")+
geom_point(data = dta[dta$method %in% c("REV", "MBR"),],aes(color = factor(method)))+
geom_point(data = dta[dta$method %in% c("REV", "MBR"),],aes_string(color = factor("method")))+
#geom_point(aes(color=as.factor(method)))+
labs(x = "DAG index", y = "DAG scores",colour="Methods",title = "Trace plot") + theme_pubr() +
scale_color_manual(values = c("#F2C500", "#56B4E9"))
y_density <- axis_canvas(original_plot, axis = "y", coord_flip = TRUE) +
geom_density(data = dta, aes(x=scores,fill = factor(method)), color = NA, alpha = 0.5) +
geom_density(data = dta, aes_string(x="scores",fill = factor("method")), color = NA, alpha = 0.5) +
scale_fill_manual(values = c( "#F2C500","#37454B", "#56B4E9")) +
coord_flip()
......@@ -39,11 +42,11 @@ plot.mcmcabn <- function(x,
dta$cummax[i] <- dta$cummax[i-1]
}
}
p<-ggplot(data = dta,aes(x = X,y = cummax))+
p<-ggplot(data = dta,aes_string(x = "X",y = "cummax"))+
geom_line() +
geom_hline(yintercept = max.score., linetype="dashed", color = "red", alpha = 0.8) +
geom_text(aes(0,max.score.,label = round(max.score., digits = 2), vjust = -.5),color="red") +
geom_point(data = dta[dta$method %in% c("REV", "MBR"),],aes(color = factor(method)))+
geom_point(data = dta[dta$method %in% c("REV", "MBR"),],aes_string(color = factor("method")))+
theme_pubr() +
scale_colour_manual(values = c("#F2C500", "#56B4E9")) +
labs(x = "DAG index", y = "DAG scores",color="Methods",title = "Trace plot")
......
......@@ -25,7 +25,7 @@ summary.mcmcabn <- function(object, quantiles = c(0.025, 0.25, 0.5, 0.75, 0.975)
print(out2, ...)
cat("\n\nAcceptance rate: ", 1-mean(object$rejection),"\n",sep = "")
cat("Sample size adjusted for autocorrelation: ", unname(effectiveSize(mcmc.out$scores)),"\n",sep = "")
cat("Sample size adjusted for autocorrelation: ", unname(effectiveSize(object$scores)),"\n",sep = "")
}#EOF
......@@ -70,8 +70,9 @@ Werhli, A. V., & Husmeier, D. (2007). "Reconstructing gene regulatory networks w
Scutari, M. (2010). Learning Bayesian Networks with the bnlearn R Package. Journal of Statistical Software, 35(3), 1 - 22. doi:http://dx.doi.org/10.18637/jss.v035.i03.
}
\donotrun{
\examples{
\dontrun{
## Example from the asia dataset from Lauritzen and Spiegelhalter (1988) provided by Scutari (2010)
data("mcmc_run_asia")
......
......@@ -7,13 +7,14 @@
\title{MCMC search from the synthetic asia dataset for use with mcmcabn library examples}
\description{10^5 MCMC runs with 1000 burn in run from the asia synthetic datasets from Lauritzen and Spiegelhalter (1988) provided by Scutari (2010).
}
\usage{ex0.dag.data}
\format{A data frame, binary variables are factors. The relevant formulas are given below (note these do not give parameter estimates just the form of the relationships, e.g. logit()=1 means a logit link function and comprises of only an intercept term).
\describe{
\usage{data(mcmc_run_asia)}
\format{
The data contains an object of class mcmcabn and a cache of score compute with abn R package.
\item{bsc.compute}{cache of score with a maximum of two parents per node.}
\item{dist}{a named list giving the distribution for each node in the network}
\item{mcmc.out}{an object of class mcmcabn.}
\itemize{
\item \code{bsc.compute}: cache of score with a maximum of two parents per node.
\item \code{dist}: a named list giving the distribution for each node in the network.
\item \code{mcmc.out}: an object of class mcmcabn.
}}
\examples{
......@@ -23,7 +24,14 @@ library(bnlearn) #for the dataset
library(abn) #for the cache of score function
#renaming columns of the dataset
colnames(asia) <- c("Asia","Smoking", "Tuberculosis", "LungCancer", "Bronchitis", "Either", "XRay", "Dyspnea")
colnames(asia) <- c("Asia",
"Smoking",
"Tuberculosis",
"LungCancer",
"Bronchitis",
"Either",
"XRay",
"Dyspnea")
#lets define the distribution list
dist <- list(Asia = "binomial",
......
......@@ -37,8 +37,9 @@ Claus O. Wilke (2019). cowplot: Streamlined Plot Theme and Plot Annotations for
Scutari, M. (2010). Learning Bayesian Networks with the bnlearn R Package. Journal of Statistical Software, 35(3), 1 - 22. doi:http://dx.doi.org/10.18637/jss.v035.i03.
}
\donotrun{
\examples{
\dontrun{
## Example from the asia dataset from Lauritzen and Spiegelhalter (1988) provided by Scutari (2010)
data("mcmc_run_asia")
......
......@@ -53,9 +53,16 @@ query(mcmcabn = mcmc.out)
## what is the probability of LungCancer node being children of the Smoking node?
query(mcmcabn = mcmc.out,formula = ~LungCancer|Smoking)
## what is the probability of Smoking node being parent of both LungCancer and Bronchitis node?
query(mcmcabn = mcmc.out,formula = ~ LungCancer|Smoking+Bronchitis|Smoking)
## what is the probability of previous statement when there is no arc from Smoking to Tuberculosis and from Bronchitis to XRay?
query(mcmcabn = mcmc.out,formula = ~LungCancer|Smoking+Bronchitis|Smoking-Tuberculosis|Smoking-XRay|Bronchitis)
## what is the probability of Smoking node being parent of
## both LungCancer and Bronchitis node?
query(mcmcabn = mcmc.out,
formula = ~ LungCancer|Smoking+Bronchitis|Smoking)
## what is the probability of previous statement,
## when there is no arc from Smoking to Tuberculosis and from Bronchitis to XRay?
query(mcmcabn = mcmc.out,
formula = ~LungCancer|Smoking +
Bronchitis|Smoking -
Tuberculosis|Smoking -
XRay|Bronchitis)
}
......@@ -210,8 +210,13 @@ Classically, structure MCMC is done using the algorithm described by Madigan and
Two structurally transparent workarounds have been proposed: the new edge reversal move (REV) and the Markov blanket resampling (MBR). The former advocates to make a reversal move in resampling the parent set. Indeed, the classical reversal move depends on the global configuration of the parents and children, and then fails to propose MCMC jumps that produce valid but very different DAGs in a unique move. It is know to be unbiased but the assumption of ergodicity do not necessarily hold. In the latter the same idea is applied but to the entire Markov blanket of a randomly chosen node.
We believe that having those three algorithm in a unique function with user adjustable relative frequency could only lead to better results. Indeed, those three algorithm work very differently, then it is assumed by the authors that they could be complementary
We believe that having those three algorithms in a unique function with user adjustable relative frequencies could only lead to better results. Indeed, those three algorithms work very differently. Indeed, the MC^3 is very stable and sample efficiently the nearby region. The REV and MBR could produce large MCMC jumps but different then possibly complementary.
### Prior
Three priors are implemented in mcmcabn R package. The parameter `prior.choice` determines the prior used within each individual node for a given choice of parent combination. In Koivisto and Sood (2004) p.554 a form of prior, called Koivisto prior, is used which assumes that the prior probability for parent combinations comprising of the same number of parents are all equal. Specifically, that the prior probability for parent set G with cardinality |G| is proportional to 1/[n-1 choose |G|] where there are n total nodes. Note that this favours parent combinations with either very low or very high cardinality which may not be appropriate. This prior is used when `prior.choice=2`. When `prior.choice=1` an uninformative prior is used where parent combinations of all cardinalities are equally likely.
When `prior.choice=3` a user defined prior is used, defined by `prior.dag`. It is given by an adjacency matrix (squared and same size as number of nodes) where entries ranging from zero to one give the user prior belief. An hyper parameter defining the global user belief in the prior is given by `prior.lambda`. As
# Bibliography
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment