To plot a graph, the Rgraphviz package must be loaded and this must unfortunately be done manually:
library(gRain)
library(Rgraphviz)
d <- dag(~ch|fa:mo + ch.pht|ch + mo.pht|mo + fa.pht|fa)
plot( d )
Conditional independence in the multivariate normal distribution
K <- matrix(c(2,1,0,
1,2,1,
0,1,2), 3, 3)
Sigma <- solve(K)
Sigma
## [,1] [,2] [,3]
## [1,] 0.75 -0.5 0.25
## [2,] -0.50 1.0 -0.50
## [3,] 0.25 -0.5 0.75
d <- data.frame(MASS::mvrnorm(100, mu=c(0,0,0), Sigma=Sigma))
Marginally, all variables are correlated:
pairs(d)
cor(d)
## X1 X2 X3
## X1 1.0000000 -0.4754942 0.3073193
## X2 -0.4754942 1.0000000 -0.5469225
## X3 0.3073193 -0.5469225 1.0000000
Regress X1 on X2 and X3 on X2 and look at the residuals
r1.2 <- resid( lm( X1 ~ X2, data=d) )
r3.2 <- resid( lm( X3 ~ X2, data=d) )
plot(r1.2, r3.2)
cor(r1.2, r3.2)
## [1] 0.06417072
The residuals are now uncorrelated so after adjusting for X2, any association between X1 and X3 has been removed. An alternative view is this: Regress X1 on X2 and X3:
coef( summary( lm( X1 ~ X2 + X3, data=d) ) )
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02473166 0.07332566 0.3372852 7.366306e-01
## X2 -0.36729498 0.08916068 -4.1194726 7.994972e-05
## X3 0.05964660 0.09418179 0.6333136 5.280193e-01
Cleary X1 does not depend on X3 once we know X2.
Getting the phenotype into the model
library(gRain)
library(Rgraphviz)
What we did in the tutorial
inheritance <- mendel( c("y","g"), names = c("ch", "fa", "mo") )$prob
head( inheritance )
## [1] 1.0 0.0 0.0 0.5 0.5 0.0
gts <- c("yy", "yg", "gg")
gtprobs <- c(.25, .50, .25)
ssp <- list(fa = gts, mo = gts, ch = gts)
p.c_fm <- arMk(~ch:fa:mo, levels=ssp, values=inheritance)
p.fa <- arMk(~fa, levels=ssp, values=gtprobs)
p.mo <- arMk(~mo, levels=ssp, values=gtprobs)
ftable( p.c_fm, row.vars = "ch" )
## fa yy yg gg
## mo yy yg gg yy yg gg yy yg gg
## ch
## yy 1.00 0.50 0.00 0.50 0.25 0.00 0.00 0.00 0.00
## yg 0.00 0.50 1.00 0.50 0.50 0.50 1.00 0.50 0.00
## gg 0.00 0.00 0.00 0.00 0.25 0.50 0.00 0.50 1.00
cptl <- compileCPT( list(p.fa, p.mo, p.c_fm ) ); cptl
## CPTspec with probabilities:
## P( fa )
## P( mo )
## P( ch | fa mo )
trio <- grain( cptl )
plot( trio )
We extend network with phenotypes (the colours that can be observed):
ptprobs <- c(1,0,1,0,0,1)
pt <- c("Y", "G")
ssp <- list(fa = gts, mo = gts, ch = gts, ch.pt=pt, fa.pt=pt, mo.pt=pt)
p.cpt <- arMk(~ch.pt:ch, levels=ssp, values=ptprobs)
p.fpt <- arMk(~fa.pt:fa, levels=ssp, values=ptprobs)
p.mpt <- arMk(~mo.pt:mo, levels=ssp, values=ptprobs)
A logical array:
p.cpt
## ch
## ch.pt yy yg gg
## Y 1 1 0
## G 0 0 1
Now we have
cptl2 <- compileCPT( list( p.cpt, p.fpt, p.mpt, p.fa, p.mo, p.c_fm ) ); cptl
## CPTspec with probabilities:
## P( fa )
## P( mo )
## P( ch | fa mo )
trio2 <- grain( cptl2 )
plot( trio2 )
Now we can condition on the phenotype
qgrain(trio2, nodes=c("ch","fa","mo"))
## $ch
## ch
## yy yg gg
## 0.25 0.50 0.25
##
## $fa
## fa
## yy yg gg
## 0.25 0.50 0.25
##
## $mo
## mo
## yy yg gg
## 0.25 0.50 0.25
When child is yellow, the genotype must be yy or yg
qgrain(trio2, evidence=list(ch.pt="Y"), nodes=c("ch","fa","mo"))
## $ch
## ch
## yy yg gg
## 0.3333333 0.6666667 0.0000000
##
## $fa
## fa
## yy yg gg
## 0.3333333 0.5000000 0.1666667
##
## $mo
## mo
## yy yg gg
## 0.3333333 0.5000000 0.1666667
We don't know which genotype it is, but yg is still twice as probable as yy - just as the case was before conditioning