|
| 1 | +# week 7 |
| 2 | +# varying effects, clusters and features, non-centering |
| 3 | + |
| 4 | +library(rethinking) |
| 5 | + |
| 6 | +# simple varying intercepts model |
| 7 | +library(rethinking) |
| 8 | +data(bangladesh) |
| 9 | +d <- bangladesh |
| 10 | + |
| 11 | +dat <- list( |
| 12 | + C = d$use.contraception, |
| 13 | + D = as.integer(d$district) ) |
| 14 | + |
| 15 | +mCD <- ulam( |
| 16 | + alist( |
| 17 | + C ~ bernoulli(p), |
| 18 | + logit(p) <- a[D], |
| 19 | + vector[61]:a ~ normal(abar,sigma), |
| 20 | + abar ~ normal(0,1), |
| 21 | + sigma ~ exponential(1) |
| 22 | + ) , data=dat , chains=4 , cores=4 ) |
| 23 | + |
| 24 | + |
| 25 | +# plot estimates |
| 26 | +p <- link( mCD , data=list(D=1:61) ) |
| 27 | +# blank2(w=2) |
| 28 | +plot( NULL , xlab="district" , lwd=3 , col=2 , xlim=c(1,61), ylim=c(0,1) , ylab="prob use contraception" ) |
| 29 | + |
| 30 | +points( 1:61 , apply(p,2,mean) , xlab="district" , lwd=3 , col=2 , ylim=c(0,1) , ylab="prob use contraception" ) |
| 31 | + |
| 32 | + for ( i in 1:61 ) lines( c(i,i) , PI(p[,i]) , lwd=8 , col=col.alpha(2,0.5) ) |
| 33 | + |
| 34 | +# show raw proportions - have to skip 54 |
| 35 | +n <- table(dat$D) |
| 36 | +Cn <- xtabs(dat$C ~ dat$D) |
| 37 | +pC <- as.numeric( Cn/n ) |
| 38 | +pC <- c( pC[1:53] , NA , pC[54:60] ) |
| 39 | +points( pC , lwd=2 ) |
| 40 | + |
| 41 | +# only some labels via locator |
| 42 | +n <- table(dat$D) |
| 43 | +n <- as.numeric(n) |
| 44 | +n <- c( n[1:53] , 0 , n[54:60] ) |
| 45 | +identify( 1:61 , pC , labels=n , cex=1 ) |
| 46 | + |
| 47 | + |
| 48 | + |
| 49 | + |
| 50 | +##################### |
| 51 | +# add urban category |
| 52 | + |
| 53 | +dat <- list( |
| 54 | + C = d$use.contraception, |
| 55 | + D = as.integer(d$district), |
| 56 | + U = ifelse(d$urban==1,1,0) ) |
| 57 | + |
| 58 | +# total U |
| 59 | +mCDU <- ulam( |
| 60 | + alist( |
| 61 | + C ~ bernoulli(p), |
| 62 | + logit(p) <- a[D] + b[D]*U, |
| 63 | + vector[61]:a ~ normal(abar,sigma), |
| 64 | + vector[61]:b ~ normal(bbar,tau), |
| 65 | + c(abar,bbar) ~ normal(0,1), |
| 66 | + c(sigma,tau) ~ exponential(1) |
| 67 | + ) , data=dat , chains=4 , cores=4 ) |
| 68 | + |
| 69 | +traceplot(mCDU,pars="tau",lwd=2,n_cols=1) |
| 70 | +trankplot(mCDU,pars="tau",lwd=3,n_cols=1) |
| 71 | + |
| 72 | +# non-centered version |
| 73 | +mCDUnc <- ulam( |
| 74 | + alist( |
| 75 | + C ~ bernoulli(p), |
| 76 | + logit(p) <- a[D] + b[D]*U, |
| 77 | + # define effects using other parameters |
| 78 | + save> vector[61]:a <<- abar + za*sigma, |
| 79 | + save> vector[61]:b <<- bbar + zb*tau, |
| 80 | + # z-scored effects |
| 81 | + vector[61]:za ~ normal(0,1), |
| 82 | + vector[61]:zb ~ normal(0,1), |
| 83 | + # ye olde hyper-priors |
| 84 | + c(abar,bbar) ~ normal(0,1), |
| 85 | + c(sigma,tau) ~ exponential(1) |
| 86 | + ) , data=dat , chains=4 , cores=4 ) |
| 87 | + |
| 88 | +# plot estimates |
| 89 | + |
| 90 | +Uval <- 0 |
| 91 | +xcol <- ifelse(Uval==0,2,4) |
| 92 | +p <- link( mCDUnc , data=list(D=1:61,U=rep(Uval,61)) ) |
| 93 | +# blank2(w=2,h=0.8) |
| 94 | +plot( NULL , xlab="district" , lwd=3 , col=2 , xlim=c(1,61), ylim=c(0,1) , ylab="prob use contraception" ) |
| 95 | +abline(h=0.5,lty=2,lwd=0.5) |
| 96 | + |
| 97 | +points( 1:61 , apply(p,2,mean) , xlab="district" , lwd=3 , col=xcol , ylim=c(0,1) , ylab="prob use contraception" ) |
| 98 | + |
| 99 | + for ( i in 1:61 ) lines( c(i,i) , PI(p[,i]) , lwd=8 , col=col.alpha(xcol,0.5) ) |
| 100 | + |
| 101 | +# show raw proportions - have to skip 54 |
| 102 | +n <- table(dat$D,dat$U) |
| 103 | +Cn <- xtabs(dat$C ~ dat$D + dat$U) |
| 104 | +pC <- as.numeric( Cn[,Uval+1]/n[,Uval+1] ) |
| 105 | +pC <- c( pC[1:53] , NA , pC[54:60] ) |
| 106 | +points( pC , lwd=2 ) |
| 107 | + |
| 108 | +# only some labels via locator |
| 109 | +nn <- as.numeric(n[,Uval+1]) |
| 110 | +nn <- c( nn[1:53] , 0 , nn[54:60] ) |
| 111 | +identify( 1:61 , pC , labels=nn , cex=1 ) |
| 112 | + |
| 113 | +# show standard deviations |
| 114 | +post <- extract.samples(mCDUnc) |
| 115 | +dens(post$sigma,xlab="posterior standard deviation",lwd=3,col=2,xlim=c(0,1.2)) |
| 116 | +dens(post$tau,lwd=3,col=4,add=TRUE,adj=0.2) |
| 117 | +curve(dexp(x,1),from=0,to=1.3,add=TRUE,lwd=2,lty=2) |
| 118 | + |
| 119 | +#### |
| 120 | +# shrinkage plot now |
| 121 | +post <- extract.samples(mCDUnc) |
| 122 | +logitp0 <- post$a |
| 123 | +logitp1 <- post$a + post$b |
| 124 | + |
| 125 | +# blank2(w=1) |
| 126 | +#plot( NULL , xlab="log-odds C (rural)" , ylab="log-odds C (urban)" , xlim=c(-2,1), ylim=c(-1.5,1.5) ) |
| 127 | + |
| 128 | +plot( NULL , xlab="prob C (rural)" , ylab="prob C (urban)" , xlim=c(0.1,0.7), ylim=c(0.2,0.75) ) |
| 129 | +abline(h=0.5,lty=2,lwd=0.5) |
| 130 | +abline(v=0.5,lty=2,lwd=0.5) |
| 131 | + |
| 132 | +# plausibility ellipses |
| 133 | +library(ellipse) |
| 134 | +xxx <- sample(1:61,size=6) |
| 135 | +for ( i in xxx ) { |
| 136 | + SIGMA <- cov( cbind( logitp0[,i] , logitp1[,i] ) ) |
| 137 | + MU <- c( mean(logitp0[,i]) , mean(logitp1[,i]) ) |
| 138 | + el <- ellipse( SIGMA , centre=MU , level=0.5 ) |
| 139 | + lines( inv_logit(el) , col=col.alpha(2,0.3) , lwd=2 ) |
| 140 | + #polygon( inv_logit(el) , col=col.alpha(2,0.2) , border=NA ) |
| 141 | +} |
| 142 | + |
| 143 | +# posterior means |
| 144 | +p0 <- inv_logit(logitp0) |
| 145 | +p1 <- inv_logit(logitp1) |
| 146 | +points( apply(p0,2,mean) , apply(p1,2,mean) , lwd=6 , col="white" ) |
| 147 | +points( apply(p0,2,mean) , apply(p1,2,mean) , lwd=3 , col=2 ) |
| 148 | + |
| 149 | +n <- table(dat$D,dat$U) |
| 150 | +Cn <- xtabs(dat$C ~ dat$D + dat$U) |
| 151 | +pC0 <- as.numeric( Cn[,1]/n[,1] ) |
| 152 | +pC1 <- as.numeric( Cn[,2]/n[,2] ) |
| 153 | + |
| 154 | +points( (pC0) , (pC1) , lwd=2 , cex=2*apply(n,1,sum)/100 + 0.5 ) |
| 155 | + |
| 156 | +for ( i in 1:61 ) { |
| 157 | + lines( c(pC0[i],p0x[i])) , c(pC1[i],p1x[i]) ) |
| 158 | +} |
| 159 | + |
0 commit comments