Graphical models and Bayesian networks

Zurich, November 3. + 4., 2016

Revised February 2017

Søren Højsgaard

Exercise (flu and fever)

library(gRbase)

yn<-c("y","n")
ssp<- list(F=yn, T=yn, H=yn)
p.f <- ar_new(~F, levels=ssp, values=c(.01, .99))
p.t_f <- ar_new(~T:F, levels=ssp, values=c(.99, .01, .001, .999))
p.h_t <- ar_new(~H:T, levels=ssp, values=c(.7, .3, .05, .95))

P(F, T, H)

joint <- ar_prod(p.f, p.t_f, p.h_t)
joint
## , , F = y
## 
##    T
## H         y       n
##   y 0.00693 5.0e-06
##   n 0.00297 9.5e-05
## 
## , , F = n
## 
##    T
## H          y      n
##   y 0.000693 0.0495
##   n 0.000297 0.9396

question 1

Brute force marginalization

ar_marg(joint, "F") # Hardly surprising
## F
##    y    n 
## 0.01 0.99
ar_marg(joint, "T")
## T
##      y      n 
## 0.0109 0.9891
ar_marg(joint, "H")
## H
##      y      n 
## 0.0571 0.9429

question 2

P(H|F=y) Option 1: Brute force:

ar_dist(joint, marg="H", cond=list(F="y"))
## H
##     y     n 
## 0.694 0.306

Option 2: Be smart (avoid joint distribution) first find P(T,F) and from that P(T)

p.tf <- ar_prod(p.f, p.t_f)
p.tf
##    F
## T        y       n
##   y 0.0099 0.00099
##   n 0.0001 0.98901

P(T)

p.t <- ar_marg(p.tf, "T")
p.t
## T
##      y      n 
## 0.0109 0.9891

then compute P(H, T) = P(H|T)P(T) and from that P(H)

p.ht <- ar_prod(p.h_t, p.t)

P(H)

ar_marg(p.ht, "H")
## H
##      y      n 
## 0.0571 0.9429

question 3

P(F|H=y) Option 1: Brute force:

ar_dist(joint, marg="F", cond=list(H="y"))
## F
##     y     n 
## 0.121 0.879

Option 2: Be smart (avoid joint distribution)

lik <- ar_slice(p.h_t, slice=list(H="y"), as.array=T)
p <- ar_prod(p.t_f, lik, p.f)
p
##    T
## F          y        n
##   y 0.006930 0.000005
##   n 0.000693 0.049451
p.f.update <- ar_marg(p, "F")
p.f.update / sum(p.f.update)
## F
##     y     n 
## 0.121 0.879

question 4

ar_dist(joint, marg="F", cond=list(T="y"))
## F
##      y      n 
## 0.9091 0.0909

question 5

ar_dist(joint, marg="F", cond=list(T="y", H="y"))
## F
##      y      n 
## 0.9091 0.0909

Exercise (Mendel)

library(gRain)
gts <- c("yy", "yg", "gg")
gtprobs <- c(.25, .50, .25)

men <- mendel( c("y", "g"), names = c("ch", "fa", "mo") )
head( men, 10 )
##    ch fa mo prob
## 1  yy yy yy  1.0
## 2  yg yy yy  0.0
## 3  gg yy yy  0.0
## 4  yy yg yy  0.5
## 5  yg yg yy  0.5
## 6  gg yg yy  0.0
## 7  yy gg yy  0.0
## 8  yg gg yy  1.0
## 9  gg gg yy  0.0
## 10 yy yy yg  0.5
dim( men )
## [1] 27  4
## For later use, save inheritance probabilities
inheritance <- men$prob
head( inheritance )
## [1] 1.0 0.0 0.0 0.5 0.5 0.0
ssp <- list(fa = gts, mo = gts, ch = gts, ch1 = gts, ch2 = gts)

p.fa <- ar_new(~ fa, levels=ssp, values=gtprobs); p.fa
## fa
##   yy   yg   gg 
## 0.25 0.50 0.25
p.mo <- ar_new(~ mo, levels=ssp, values=gtprobs); p.mo
## mo
##   yy   yg   gg 
## 0.25 0.50 0.25
p.c1_fm <- ar_new(~ ch1:fa:mo, levels=ssp, values=inheritance)
p.c2_fm <- ar_new(~ ch2:fa:mo, levels=ssp, values=inheritance)

joint <- ar_prod(p.fa, p.mo, p.c1_fm, p.c2_fm)
joint  %>% round(2)  %>% ftable(col.vars = c("ch1", "ch2"))
##       ch1   yy             yg             gg          
##       ch2   yy   yg   gg   yy   yg   gg   yy   yg   gg
## fa mo                                                 
## yy yy     0.06 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
##    yg     0.03 0.03 0.00 0.03 0.03 0.00 0.00 0.00 0.00
##    gg     0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00
## yg yy     0.03 0.03 0.00 0.03 0.03 0.00 0.00 0.00 0.00
##    yg     0.02 0.03 0.02 0.03 0.06 0.03 0.02 0.03 0.02
##    gg     0.00 0.00 0.00 0.00 0.03 0.03 0.00 0.03 0.03
## gg yy     0.00 0.00 0.00 0.00 0.06 0.00 0.00 0.00 0.00
##    yg     0.00 0.00 0.00 0.00 0.03 0.03 0.00 0.03 0.03
##    gg     0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.06
ar_dist(joint, marg=~ch1)
## ch1
##   yy   yg   gg 
## 0.25 0.50 0.25
ar_dist(joint, marg=~ch1, cond=list(ch2="yy"))
## ch1
##     yy     yg     gg 
## 0.5625 0.3750 0.0625
ar_dist(joint, marg=~ch1, cond=list(ch2="yy", fa="yg", mo="yy"))
## ch1
##  yy  yg  gg 
## 0.5 0.5 0.0
ar_dist(joint, marg=~ch1, cond=list(fa="yg", mo="yy"))
## ch1
##  yy  yg  gg 
## 0.5 0.5 0.0
ar_dist(joint, marg=~ch1, cond=list(ch2="yy", mo="yy"))
## ch1
##   yy   yg   gg 
## 0.75 0.25 0.00
ar_dist(joint, marg=~ch1, cond=list(mo="yy"))
## ch1
##  yy  yg  gg 
## 0.5 0.5 0.0

Ann Nicholson

library(gRain)
yn <- c("y", "n")
ssp <- list(O=yn, L=yn, C=yn)
p.O <- ar_new(~O, levels=ssp, values=c(.6, .4))
p.L_O <- ar_new(~L:O, levels=ssp, values=c(.5, .5, .05, .95))
p.C_O <- ar_new(~C:O, levels=ssp, values=c(.8, .2, .1, .9))

cpt <- list(p.O, p.L_O, p.C_O)
bn <- grain( compileCPT(cpt))

library(Rgraphviz)
plot(bn)

plot of chunk unnamed-chunk-14

qgrain(bn)
## $O
## O
##   y   n 
## 0.6 0.4 
## 
## $L
## L
##    y    n 
## 0.32 0.68 
## 
## $C
## C
##    y    n 
## 0.52 0.48
qgrain(bn, evidence=list(C="y"))
## $O
## O
##      y      n 
## 0.9231 0.0769 
## 
## $L
## L
##     y     n 
## 0.465 0.535

Finding log-linear model for wine data

library(gRim)
data(wine, package="gRbase")

pc <- prcomp(wine[,-1], retx=T)
pairs(pc$x[,1:2], col=wine$Cult, lwd=2)

plot of chunk unnamed-chunk-15

To make tings fast, only look at a few variables

wine <- wine[, 1:10]
d <- lapply(wine[,-1], cut, breaks=2)
d <- as.data.frame(d)
head(d)
##          Alch         Mlca         Ash        Aloa       Mgns        Ttlp
## 1 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3]  (116,162] (2.43,3.88]
## 2 (12.9,14.8] (0.735,3.27] (1.36,2.29] (10.6,20.3] (69.9,116] (2.43,3.88]
## 3 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116] (2.43,3.88]
## 4 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116] (2.43,3.88]
## 5 (12.9,14.8] (0.735,3.27] (2.29,3.23]   (20.3,30]  (116,162] (2.43,3.88]
## 6 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116] (2.43,3.88]
##           Flvn          Nnfp         Prnt
## 1  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 2  (2.71,5.08] (0.129,0.395] (0.407,1.99]
## 3  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 4  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 5 (0.335,2.71] (0.129,0.395] (0.407,1.99]
## 6  (2.71,5.08] (0.129,0.395] (0.407,1.99]
wd <- cbind(Cult=wine[[1]], d)
head(wd)
##   Cult        Alch         Mlca         Ash        Aloa       Mgns
## 1   v1 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3]  (116,162]
## 2   v1 (12.9,14.8] (0.735,3.27] (1.36,2.29] (10.6,20.3] (69.9,116]
## 3   v1 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116]
## 4   v1 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116]
## 5   v1 (12.9,14.8] (0.735,3.27] (2.29,3.23]   (20.3,30]  (116,162]
## 6   v1 (12.9,14.8] (0.735,3.27] (2.29,3.23] (10.6,20.3] (69.9,116]
##          Ttlp         Flvn          Nnfp         Prnt
## 1 (2.43,3.88]  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 2 (2.43,3.88]  (2.71,5.08] (0.129,0.395] (0.407,1.99]
## 3 (2.43,3.88]  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 4 (2.43,3.88]  (2.71,5.08] (0.129,0.395]  (1.99,3.58]
## 5 (2.43,3.88] (0.335,2.71] (0.129,0.395] (0.407,1.99]
## 6 (2.43,3.88]  (2.71,5.08] (0.129,0.395] (0.407,1.99]
m <- dmod(~ .^., data=wd)

m2 <- stepwise(m, k=log(nrow(wine)), details=1)
## STEPWISE:  
##  criterion: aic ( k = 5.18 ) 
##  direction: backward 
##  type     : decomposable 
##  search   : all 
##  steps    : 1000 
## . BACKWARD: type=decomposable search=all, criterion=aic(5.18), alpha=0.00 
## . Initial model: is graphical=TRUE is decomposable=TRUE
##   change.AIC  -39.4975 Edge deleted: Aloa Nnfp
##   change.AIC  -31.2963 Edge deleted: Aloa Mgns
##   change.AIC  -32.6240 Edge deleted: Mlca Aloa
##   change.AIC  -28.6877 Edge deleted: Cult Nnfp
##   change.AIC  -31.5803 Edge deleted: Alch Nnfp
##   change.AIC  -28.5225 Edge deleted: Alch Aloa
##   change.AIC  -25.8954 Edge deleted: Alch Ash
##   change.AIC  -29.8651 Edge deleted: Mlca Alch
##   change.AIC  -24.6904 Edge deleted: Mgns Nnfp
##   change.AIC  -30.8020 Edge deleted: Ash Mgns
##   change.AIC  -23.1625 Edge deleted: Alch Mgns
##   change.AIC  -32.9210 Edge deleted: Cult Mgns
##   change.AIC  -22.1600 Edge deleted: Mlca Nnfp
##   change.AIC  -26.5104 Edge deleted: Ash Mlca
##   change.AIC  -20.7918 Edge deleted: Aloa Prnt
##   change.AIC  -17.9112 Edge deleted: Mlca Mgns
##   change.AIC  -16.8911 Edge deleted: Ttlp Aloa
##   change.AIC  -14.7997 Edge deleted: Flvn Aloa
##   change.AIC  -14.1973 Edge deleted: Prnt Nnfp
##   change.AIC  -15.4668 Edge deleted: Prnt Ash
##   change.AIC  -13.8401 Edge deleted: Alch Prnt
##   change.AIC  -12.9150 Edge deleted: Ttlp Alch
##   change.AIC  -11.2458 Edge deleted: Mlca Prnt
##   change.AIC  -11.9972 Edge deleted: Ttlp Mlca
##   change.AIC  -10.7326 Edge deleted: Mgns Prnt
##   change.AIC   -6.4552 Edge deleted: Flvn Mlca
##   change.AIC   -5.2069 Edge deleted: Cult Prnt
##   change.AIC   -4.7003 Edge deleted: Flvn Mgns
##   change.AIC   -4.1421 Edge deleted: Flvn Nnfp
##   change.AIC   -9.5940 Edge deleted: Ash Flvn
##   change.AIC   -3.3676 Edge deleted: Alch Flvn
##   change.AIC   -1.0881 Edge deleted: Ttlp Mgns
##   change.AIC   -0.5558 Edge deleted: Ash Nnfp
##   change.AIC  -14.0365 Edge deleted: Ash Ttlp
m3 <- stepwise(m, k=log(nrow(wine)), search="headlong", details=1)
## STEPWISE:  
##  criterion: aic ( k = 5.18 ) 
##  direction: backward 
##  type     : decomposable 
##  search   : headlong 
##  steps    : 1000 
## . BACKWARD: type=decomposable search=headlong, criterion=aic(5.18), alpha=0.00 
## . Initial model: is graphical=TRUE is decomposable=TRUE
##   change.AIC   -6.0225 Edge deleted: Ttlp Prnt
##   change.AIC   -4.7786 Edge deleted: Ttlp Flvn
##   change.AIC  -23.0178 Edge deleted: Ash Ttlp
##   change.AIC  -16.5669 Edge deleted: Cult Prnt
##   change.AIC  -29.4943 Edge deleted: Ttlp Nnfp
##   change.AIC  -15.1130 Edge deleted: Alch Prnt
##   change.AIC  -23.2670 Edge deleted: Mlca Ttlp
##   change.AIC  -28.7601 Edge deleted: Aloa Prnt
##   change.AIC  -22.3274 Edge deleted: Mgns Ttlp
##   change.AIC  -22.9263 Edge deleted: Nnfp Prnt
##   change.AIC  -26.2819 Edge deleted: Alch Mlca
##   change.AIC  -34.3837 Edge deleted: Cult Mlca
##   change.AIC  -17.0391 Edge deleted: Mlca Prnt
##   change.AIC  -27.9531 Edge deleted: Nnfp Mlca
##   change.AIC  -15.0044 Edge deleted: Alch Flvn
##   change.AIC  -21.0970 Edge deleted: Mgns Mlca
##   change.AIC  -10.5315 Edge deleted: Mgns Prnt
##   change.AIC  -32.0743 Edge deleted: Alch Ash
##   change.AIC  -30.5973 Edge deleted: Ash Mgns
##   change.AIC  -30.3944 Edge deleted: Alch Mgns
##   change.AIC  -15.0670 Edge deleted: Cult Ash
##   change.AIC  -14.6504 Edge deleted: Flvn Mgns
##   change.AIC  -17.2241 Edge deleted: Aloa Mlca
##   change.AIC  -17.4370 Edge deleted: Aloa Ttlp
##   change.AIC  -20.5598 Edge deleted: Alch Nnfp
##   change.AIC  -24.1292 Edge deleted: Aloa Mgns
##   change.AIC   -9.8839 Edge deleted: Alch Ttlp
##   change.AIC   -9.5932 Edge deleted: Ash Mlca
##   change.AIC  -14.7854 Edge deleted: Aloa Alch
##   change.AIC   -7.9679 Edge deleted: Ash Aloa
##   change.AIC  -15.1174 Edge deleted: Cult Mgns
##   change.AIC   -8.9419 Edge deleted: Cult Aloa
##   change.AIC   -7.6554 Edge deleted: Ash Prnt
##   change.AIC   -2.3723 Edge deleted: Flvn Ash
##   change.AIC   -2.6051 Edge deleted: Flvn Aloa
##   change.AIC   -7.4464 Edge deleted: Flvn Nnfp
##   change.AIC   -2.8007 Edge deleted: Mgns Nnfp
formula(m2)
## ~Cult * Ash * Aloa + Cult * Mlca + Ttlp * Flvn * Prnt + Cult * 
##     Ttlp * Flvn + Cult * Alch + Mgns + Ttlp * Nnfp
formula(m3)
## ~Cult * Ttlp + Mlca * Flvn + Cult * Alch + Flvn * Prnt + Ash * 
##     Nnfp + Aloa * Nnfp + Cult * Flvn + Cult * Nnfp + Mgns
plot(m2)

plot of chunk unnamed-chunk-16

plot(m3)

plot of chunk unnamed-chunk-16

bn2 <- grain(m2, smooth=.01)
bn3 <- grain(m3, smooth=.01)