Skip to content

Commit 379b377

Browse files
authored
lecture 14 script
1 parent 3eedab4 commit 379b377

File tree

1 file changed

+251
-0
lines changed

1 file changed

+251
-0
lines changed
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
# week 7
2+
# varying effects, clusters and features, non-centering
3+
4+
library(rethinking)
5+
6+
# simple varying intercepts model
7+
library(rethinking)
8+
data(bangladesh)
9+
d <- bangladesh
10+
11+
12+
# visualize a and b distributions as independent gaussian
13+
14+
blank2()
15+
16+
library(ellipse)
17+
rho <- 0.5
18+
SIGMA <- matrix( c(1,rho,rho,1) , 2 , 2 )
19+
MU <- c( 0 , 0 )
20+
21+
# SIGMA <- rlkjcorr(1,2,eta=4)
22+
23+
plot( NULL , xlim=c(-3,3) , ylim=c(-3,3) , xlab="a" , ylab="b" )
24+
for ( l in seq(from=0.25,to=0.95,len=5) ) {
25+
el <- ellipse( SIGMA , centre=MU , level=l )
26+
#lines( (el) , col=2 , lwd=3 )
27+
polygon( (el) , col=col.alpha(2,0.25) , border=NA )
28+
}
29+
30+
Y <- rmvnorm(6,c(0,0),sigma=SIGMA)
31+
points( Y , lwd=4 , col="white" )
32+
points( Y , lwd=3 , col=2 )
33+
34+
# lkjcorr prior predictive
35+
36+
plot( NULL , xlim=c(-2.5,2.5) , ylim=c(-2.5,2.5) , xlab="a" , ylab="b" )
37+
for ( i in 1:10 ) {
38+
RHO <- rlkjcorr(1,2,eta=4)
39+
s <- rexp(1,1)
40+
tau <- rexp(1,1)
41+
SIGMA <- diag(c(s,tau)) %*% RHO %*% diag(c(s,tau))
42+
el <- ellipse( SIGMA , centre=MU , level=0.89 )
43+
lines( (el) , col=col.alpha(2,0.5) , lwd=3 )
44+
#polygon( (el) , col=col.alpha(2,0.25) , border=NA )
45+
}
46+
47+
SIGMA <- rlkjcorr(1e4,2,eta=4)
48+
dens(SIGMA[,1,2],lwd=3,col=2,xlab="correlation")
49+
50+
###########
51+
# non-centered varying slopes with and without covariance
52+
53+
dat <- list(
54+
C = d$use.contraception,
55+
D = as.integer(d$district),
56+
U = d$urban,
57+
A = standardize(d$age.centered),
58+
K = d$living.children )
59+
60+
# no covariance
61+
mCDUnc <- ulam(
62+
alist(
63+
C ~ bernoulli(p),
64+
logit(p) <- a[D] + b[D]*U,
65+
# define effects using other parameters
66+
save> vector[61]:a <<- abar + za*sigma,
67+
save> vector[61]:b <<- bbar + zb*tau,
68+
# z-scored effects
69+
vector[61]:za ~ normal(0,1),
70+
vector[61]:zb ~ normal(0,1),
71+
# ye olde hyper-priors
72+
c(abar,bbar) ~ normal(0,1),
73+
c(sigma,tau) ~ exponential(1)
74+
) , data=dat , chains=4 , cores=4 )
75+
76+
# covariance - centered
77+
mCDUcov <- ulam(
78+
alist(
79+
C ~ bernoulli(p),
80+
logit(p) <- a[D] + b[D]*U,
81+
# define effects using other parameters
82+
transpars> vector[61]:a <<- v[,1],
83+
transpars> vector[61]:b <<- v[,2],
84+
# priors - centered correlated varying effects
85+
matrix[61,2]:v ~ multi_normal(abar,Rho,sigma),
86+
vector[2]:abar ~ normal(0,1),
87+
corr_matrix[2]:Rho ~ lkj_corr(4),
88+
vector[2]:sigma ~ exponential(1)
89+
) , data=dat , chains=4 , cores=4 )
90+
91+
# covariance - non-centered
92+
mCDUcov_nc <- ulam(
93+
alist(
94+
C ~ bernoulli(p),
95+
logit(p) <- a[D] + b[D]*U,
96+
# define effects using other parameters
97+
# this is the non-centered Cholesky machine
98+
transpars> vector[61]:a <<- abar[1] + v[,1],
99+
transpars> vector[61]:b <<- abar[2] + v[,2],
100+
transpars> matrix[61,2]:v <-
101+
compose_noncentered( sigma , L_Rho , Z ),
102+
# priors - note that none have parameters inside them
103+
# that is what makes them non-centered
104+
matrix[2,61]:Z ~ normal( 0 , 1 ),
105+
vector[2]:abar ~ normal(0,1),
106+
cholesky_factor_corr[2]:L_Rho ~ lkj_corr_cholesky( 4 ),
107+
vector[2]:sigma ~ exponential(1),
108+
# convert Cholesky to Corr matrix
109+
gq> matrix[2,2]:Rho <<- Chol_to_Corr(L_Rho)
110+
) , data=dat , chains=4 , cores=4 )
111+
112+
precis(mCDUcov_nc,3,pars=c("Rho","sigma"))
113+
precis(mCDUnc,3,pars=c("sigma","tau"))
114+
115+
# posterior rho
116+
post <- extract.samples(mCDUcov_nc)
117+
dens( post$Rho[,1,2] , xlim=c(-1,1) , lwd=3 , col=2 , xlab="posterior correlation a,b" )
118+
abline(v=0,lty=2,lwd=0.5)
119+
prior_rho <- rlkjcorr(1e4,2,eta=4)
120+
dens( prior_rho[,1,2] , lwd=2 , lty=2 , add=TRUE )
121+
122+
# posterior MVN of a,b
123+
plot( NULL , xlim=c(-2,1) , ylim=c(-1,2) , xlab="a" , ylab="b" )
124+
abline(v=0,lty=2,lwd=0.5)
125+
abline(h=0,lty=2,lwd=0.5)
126+
SIGMA <- cov(cbind( apply(post$a,2,mean) , apply(post$b,2,mean) ) )
127+
MU <- apply(post$abar,2,mean)
128+
for ( l in seq(from=0.25,to=0.95,len=5) ) {
129+
el <- ellipse( SIGMA , centre=MU , level=l )
130+
#lines( (el) , col=2 , lwd=3 )
131+
polygon( (el) , col=col.alpha(2,0.25) , border=NA )
132+
}
133+
134+
#points( apply(post$a,2,mean) , apply(post$b,2,mean) , col="white", lwd=3 )
135+
points( apply(post$a,2,mean) , apply(post$b,2,mean) , col=1, lwd=2 )
136+
137+
post2 <- extract.samples(mCDUnc)
138+
points( apply(post2$a,2,mean) , apply(post2$b,2,mean) , col=1, lwd=2 )
139+
140+
# plot estimates
141+
142+
Uval <- 1
143+
xcol <- ifelse(Uval==0,2,4)
144+
p2 <- link( mCDUnc , data=list(D=1:61,U=rep(Uval,61)) )
145+
#p2 <- link( mCDUcov_nc , data=list(D=1:61,U=rep(Uval,61)) )
146+
147+
# blank2(w=2,h=0.8)
148+
plot( NULL , xlab="district" , lwd=3 , col=2 , xlim=c(1,61), ylim=c(0,1) , ylab="prob use contraception" )
149+
abline(h=0.5,lty=2,lwd=0.5)
150+
151+
#points( 1:61 , apply(p,2,mean) , xlab="district" , lwd=3 , col=grau(0.8) , ylim=c(0,1) )
152+
153+
points( 1:61 , apply(p2,2,mean) , xlab="district" , lwd=3 , col=xcol , ylim=c(0,1) )
154+
155+
#for ( i in 1:61 ) lines( c(i,i) , PI(p2[,i]) , lwd=8 , col=col.alpha(xcol,0.5) )
156+
157+
# show other feature
158+
Uvalx <- 1-Uval
159+
xcolx <- ifelse(Uvalx==0,2,4)
160+
p2x <- link( mCDUcov_nc , data=list(D=1:61,U=rep(Uvalx,61)) )
161+
points( 1:61 , apply(p2x,2,mean) , lwd=3 , col=xcolx )
162+
163+
# show raw proportions - have to skip 54
164+
n <- table(dat$D,dat$U)
165+
Cn <- xtabs(dat$C ~ dat$D + dat$U)
166+
pC <- as.numeric( Cn[,Uval+1]/n[,Uval+1] )
167+
pC <- c( pC[1:53] , NA , pC[54:60] )
168+
#points( pC , lwd=2 )
169+
170+
# only some labels via locator
171+
nn <- as.numeric(n[,Uval+1])
172+
nn <- c( nn[1:53] , 0 , nn[54:60] )
173+
#identify( 1:61 , pC , labels=nn , cex=1 )
174+
175+
176+
177+
178+
# shrinkage plot now
179+
# blank2(w=1)
180+
181+
idx <- 34
182+
idx <- 1:61
183+
184+
185+
plot( NULL , xlab="prob C (rural)" , ylab="prob C (urban)" , xlim=c(0,1), ylim=c(0,1) )
186+
187+
plot( NULL , xlab="prob C (rural)" , ylab="prob C (urban)" , xlim=c(0.1,0.65), ylim=c(0.2,0.75) )
188+
189+
abline(h=0.5,lty=2,lwd=0.5)
190+
abline(v=0.5,lty=2,lwd=0.5)
191+
192+
# point sizes proportional to smaple size in district
193+
n <- table(dat$D)
194+
n <- c( n[1:53] , 0 , n[54:60] )
195+
196+
# uncorrelated model
197+
post <- extract.samples(mCDUnc)
198+
logitp0 <- post$a
199+
logitp1 <- post$a + post$b
200+
p0 <- inv_logit(logitp0)
201+
p1 <- inv_logit(logitp1)
202+
#points( apply(p0,2,mean) , apply(p1,2,mean) , lwd=6 , col="white" )
203+
points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=2 , col=1 )
204+
205+
# correlated model
206+
post <- extract.samples(mCDUcov_nc)
207+
logitp0 <- post$a
208+
logitp1 <- post$a + post$b
209+
p0 <- inv_logit(logitp0)
210+
p1 <- inv_logit(logitp1)
211+
points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=5 , col="white" )
212+
points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=3 , col=2 , cex=1 )
213+
214+
215+
216+
n <- table(dat$D,dat$U)
217+
Cn <- xtabs(dat$C ~ dat$D + dat$U)
218+
pC0 <- as.numeric( Cn[,1]/n[,1] )
219+
pC1 <- as.numeric( Cn[,2]/n[,2] )
220+
221+
points( (pC0)[idx] , (pC1)[idx] , lwd=2 , cex=1 , pch=16 )
222+
#points( (pC0) , (pC1) , lwd=2 , cex=2*n[,1]/100 + 0.5 )
223+
#points( (pC0) , (pC1) , lwd=2 , cex=2*n[,2]/100 + 0.5 , col=4 )
224+
225+
p0x <- apply(p0,2,mean)
226+
p1x <- apply(p1,2,mean)
227+
for ( i in 1:61 ) {
228+
lines( c(pC0[i], p0x[i] ) , c(pC1[i], p1x[i] ) , col=grau() )
229+
}
230+
231+
232+
# show raw proportions - have to skip 54
233+
n <- table(dat$D)
234+
Cn <- xtabs(dat$C ~ dat$D)
235+
pC <- as.numeric( Cn/n )
236+
pC <- c( pC[1:53] , NA , pC[54:60] )
237+
points( pC , lwd=2 )
238+
239+
# only some labels via locator
240+
n <- table(dat$D)
241+
n <- as.numeric(n)
242+
n <- c( n[1:53] , 0 , n[54:60] )
243+
identify( 1:61 , pC , labels=n , cex=1 )
244+
245+
246+
247+
####
248+
# simple Cholesky factor example
249+
250+
R <- matrix(c(1,0.6,0.6,1),2,2)
251+
L <- chol(R)

0 commit comments

Comments
 (0)