|
| 1 | +# week 7 |
| 2 | +# varying effects, clusters and features, non-centering |
| 3 | + |
| 4 | +library(rethinking) |
| 5 | + |
| 6 | +# simple varying intercepts model |
| 7 | +library(rethinking) |
| 8 | +data(bangladesh) |
| 9 | +d <- bangladesh |
| 10 | + |
| 11 | + |
| 12 | +# visualize a and b distributions as independent gaussian |
| 13 | + |
| 14 | +blank2() |
| 15 | + |
| 16 | +library(ellipse) |
| 17 | +rho <- 0.5 |
| 18 | +SIGMA <- matrix( c(1,rho,rho,1) , 2 , 2 ) |
| 19 | +MU <- c( 0 , 0 ) |
| 20 | + |
| 21 | +# SIGMA <- rlkjcorr(1,2,eta=4) |
| 22 | + |
| 23 | +plot( NULL , xlim=c(-3,3) , ylim=c(-3,3) , xlab="a" , ylab="b" ) |
| 24 | +for ( l in seq(from=0.25,to=0.95,len=5) ) { |
| 25 | + el <- ellipse( SIGMA , centre=MU , level=l ) |
| 26 | + #lines( (el) , col=2 , lwd=3 ) |
| 27 | + polygon( (el) , col=col.alpha(2,0.25) , border=NA ) |
| 28 | +} |
| 29 | + |
| 30 | +Y <- rmvnorm(6,c(0,0),sigma=SIGMA) |
| 31 | +points( Y , lwd=4 , col="white" ) |
| 32 | +points( Y , lwd=3 , col=2 ) |
| 33 | + |
| 34 | +# lkjcorr prior predictive |
| 35 | + |
| 36 | +plot( NULL , xlim=c(-2.5,2.5) , ylim=c(-2.5,2.5) , xlab="a" , ylab="b" ) |
| 37 | +for ( i in 1:10 ) { |
| 38 | + RHO <- rlkjcorr(1,2,eta=4) |
| 39 | + s <- rexp(1,1) |
| 40 | + tau <- rexp(1,1) |
| 41 | + SIGMA <- diag(c(s,tau)) %*% RHO %*% diag(c(s,tau)) |
| 42 | + el <- ellipse( SIGMA , centre=MU , level=0.89 ) |
| 43 | + lines( (el) , col=col.alpha(2,0.5) , lwd=3 ) |
| 44 | + #polygon( (el) , col=col.alpha(2,0.25) , border=NA ) |
| 45 | +} |
| 46 | + |
| 47 | +SIGMA <- rlkjcorr(1e4,2,eta=4) |
| 48 | +dens(SIGMA[,1,2],lwd=3,col=2,xlab="correlation") |
| 49 | + |
| 50 | +########### |
| 51 | +# non-centered varying slopes with and without covariance |
| 52 | + |
| 53 | +dat <- list( |
| 54 | + C = d$use.contraception, |
| 55 | + D = as.integer(d$district), |
| 56 | + U = d$urban, |
| 57 | + A = standardize(d$age.centered), |
| 58 | + K = d$living.children ) |
| 59 | + |
| 60 | +# no covariance |
| 61 | +mCDUnc <- ulam( |
| 62 | + alist( |
| 63 | + C ~ bernoulli(p), |
| 64 | + logit(p) <- a[D] + b[D]*U, |
| 65 | + # define effects using other parameters |
| 66 | + save> vector[61]:a <<- abar + za*sigma, |
| 67 | + save> vector[61]:b <<- bbar + zb*tau, |
| 68 | + # z-scored effects |
| 69 | + vector[61]:za ~ normal(0,1), |
| 70 | + vector[61]:zb ~ normal(0,1), |
| 71 | + # ye olde hyper-priors |
| 72 | + c(abar,bbar) ~ normal(0,1), |
| 73 | + c(sigma,tau) ~ exponential(1) |
| 74 | + ) , data=dat , chains=4 , cores=4 ) |
| 75 | + |
| 76 | +# covariance - centered |
| 77 | +mCDUcov <- ulam( |
| 78 | + alist( |
| 79 | + C ~ bernoulli(p), |
| 80 | + logit(p) <- a[D] + b[D]*U, |
| 81 | + # define effects using other parameters |
| 82 | + transpars> vector[61]:a <<- v[,1], |
| 83 | + transpars> vector[61]:b <<- v[,2], |
| 84 | + # priors - centered correlated varying effects |
| 85 | + matrix[61,2]:v ~ multi_normal(abar,Rho,sigma), |
| 86 | + vector[2]:abar ~ normal(0,1), |
| 87 | + corr_matrix[2]:Rho ~ lkj_corr(4), |
| 88 | + vector[2]:sigma ~ exponential(1) |
| 89 | + ) , data=dat , chains=4 , cores=4 ) |
| 90 | + |
| 91 | +# covariance - non-centered |
| 92 | +mCDUcov_nc <- ulam( |
| 93 | + alist( |
| 94 | + C ~ bernoulli(p), |
| 95 | + logit(p) <- a[D] + b[D]*U, |
| 96 | + # define effects using other parameters |
| 97 | + # this is the non-centered Cholesky machine |
| 98 | + transpars> vector[61]:a <<- abar[1] + v[,1], |
| 99 | + transpars> vector[61]:b <<- abar[2] + v[,2], |
| 100 | + transpars> matrix[61,2]:v <- |
| 101 | + compose_noncentered( sigma , L_Rho , Z ), |
| 102 | + # priors - note that none have parameters inside them |
| 103 | + # that is what makes them non-centered |
| 104 | + matrix[2,61]:Z ~ normal( 0 , 1 ), |
| 105 | + vector[2]:abar ~ normal(0,1), |
| 106 | + cholesky_factor_corr[2]:L_Rho ~ lkj_corr_cholesky( 4 ), |
| 107 | + vector[2]:sigma ~ exponential(1), |
| 108 | + # convert Cholesky to Corr matrix |
| 109 | + gq> matrix[2,2]:Rho <<- Chol_to_Corr(L_Rho) |
| 110 | + ) , data=dat , chains=4 , cores=4 ) |
| 111 | + |
| 112 | +precis(mCDUcov_nc,3,pars=c("Rho","sigma")) |
| 113 | +precis(mCDUnc,3,pars=c("sigma","tau")) |
| 114 | + |
| 115 | +# posterior rho |
| 116 | +post <- extract.samples(mCDUcov_nc) |
| 117 | +dens( post$Rho[,1,2] , xlim=c(-1,1) , lwd=3 , col=2 , xlab="posterior correlation a,b" ) |
| 118 | +abline(v=0,lty=2,lwd=0.5) |
| 119 | +prior_rho <- rlkjcorr(1e4,2,eta=4) |
| 120 | +dens( prior_rho[,1,2] , lwd=2 , lty=2 , add=TRUE ) |
| 121 | + |
| 122 | +# posterior MVN of a,b |
| 123 | +plot( NULL , xlim=c(-2,1) , ylim=c(-1,2) , xlab="a" , ylab="b" ) |
| 124 | +abline(v=0,lty=2,lwd=0.5) |
| 125 | +abline(h=0,lty=2,lwd=0.5) |
| 126 | +SIGMA <- cov(cbind( apply(post$a,2,mean) , apply(post$b,2,mean) ) ) |
| 127 | +MU <- apply(post$abar,2,mean) |
| 128 | +for ( l in seq(from=0.25,to=0.95,len=5) ) { |
| 129 | + el <- ellipse( SIGMA , centre=MU , level=l ) |
| 130 | + #lines( (el) , col=2 , lwd=3 ) |
| 131 | + polygon( (el) , col=col.alpha(2,0.25) , border=NA ) |
| 132 | +} |
| 133 | + |
| 134 | +#points( apply(post$a,2,mean) , apply(post$b,2,mean) , col="white", lwd=3 ) |
| 135 | +points( apply(post$a,2,mean) , apply(post$b,2,mean) , col=1, lwd=2 ) |
| 136 | + |
| 137 | +post2 <- extract.samples(mCDUnc) |
| 138 | +points( apply(post2$a,2,mean) , apply(post2$b,2,mean) , col=1, lwd=2 ) |
| 139 | + |
| 140 | +# plot estimates |
| 141 | + |
| 142 | +Uval <- 1 |
| 143 | +xcol <- ifelse(Uval==0,2,4) |
| 144 | +p2 <- link( mCDUnc , data=list(D=1:61,U=rep(Uval,61)) ) |
| 145 | +#p2 <- link( mCDUcov_nc , data=list(D=1:61,U=rep(Uval,61)) ) |
| 146 | + |
| 147 | +# blank2(w=2,h=0.8) |
| 148 | +plot( NULL , xlab="district" , lwd=3 , col=2 , xlim=c(1,61), ylim=c(0,1) , ylab="prob use contraception" ) |
| 149 | +abline(h=0.5,lty=2,lwd=0.5) |
| 150 | + |
| 151 | +#points( 1:61 , apply(p,2,mean) , xlab="district" , lwd=3 , col=grau(0.8) , ylim=c(0,1) ) |
| 152 | + |
| 153 | +points( 1:61 , apply(p2,2,mean) , xlab="district" , lwd=3 , col=xcol , ylim=c(0,1) ) |
| 154 | + |
| 155 | +#for ( i in 1:61 ) lines( c(i,i) , PI(p2[,i]) , lwd=8 , col=col.alpha(xcol,0.5) ) |
| 156 | + |
| 157 | +# show other feature |
| 158 | +Uvalx <- 1-Uval |
| 159 | +xcolx <- ifelse(Uvalx==0,2,4) |
| 160 | +p2x <- link( mCDUcov_nc , data=list(D=1:61,U=rep(Uvalx,61)) ) |
| 161 | +points( 1:61 , apply(p2x,2,mean) , lwd=3 , col=xcolx ) |
| 162 | + |
| 163 | +# show raw proportions - have to skip 54 |
| 164 | +n <- table(dat$D,dat$U) |
| 165 | +Cn <- xtabs(dat$C ~ dat$D + dat$U) |
| 166 | +pC <- as.numeric( Cn[,Uval+1]/n[,Uval+1] ) |
| 167 | +pC <- c( pC[1:53] , NA , pC[54:60] ) |
| 168 | +#points( pC , lwd=2 ) |
| 169 | + |
| 170 | +# only some labels via locator |
| 171 | +nn <- as.numeric(n[,Uval+1]) |
| 172 | +nn <- c( nn[1:53] , 0 , nn[54:60] ) |
| 173 | +#identify( 1:61 , pC , labels=nn , cex=1 ) |
| 174 | + |
| 175 | + |
| 176 | + |
| 177 | + |
| 178 | +# shrinkage plot now |
| 179 | +# blank2(w=1) |
| 180 | + |
| 181 | +idx <- 34 |
| 182 | +idx <- 1:61 |
| 183 | + |
| 184 | + |
| 185 | +plot( NULL , xlab="prob C (rural)" , ylab="prob C (urban)" , xlim=c(0,1), ylim=c(0,1) ) |
| 186 | + |
| 187 | +plot( NULL , xlab="prob C (rural)" , ylab="prob C (urban)" , xlim=c(0.1,0.65), ylim=c(0.2,0.75) ) |
| 188 | + |
| 189 | +abline(h=0.5,lty=2,lwd=0.5) |
| 190 | +abline(v=0.5,lty=2,lwd=0.5) |
| 191 | + |
| 192 | +# point sizes proportional to smaple size in district |
| 193 | +n <- table(dat$D) |
| 194 | +n <- c( n[1:53] , 0 , n[54:60] ) |
| 195 | + |
| 196 | +# uncorrelated model |
| 197 | +post <- extract.samples(mCDUnc) |
| 198 | +logitp0 <- post$a |
| 199 | +logitp1 <- post$a + post$b |
| 200 | +p0 <- inv_logit(logitp0) |
| 201 | +p1 <- inv_logit(logitp1) |
| 202 | +#points( apply(p0,2,mean) , apply(p1,2,mean) , lwd=6 , col="white" ) |
| 203 | +points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=2 , col=1 ) |
| 204 | + |
| 205 | +# correlated model |
| 206 | +post <- extract.samples(mCDUcov_nc) |
| 207 | +logitp0 <- post$a |
| 208 | +logitp1 <- post$a + post$b |
| 209 | +p0 <- inv_logit(logitp0) |
| 210 | +p1 <- inv_logit(logitp1) |
| 211 | +points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=5 , col="white" ) |
| 212 | +points( apply(p0,2,mean)[idx] , apply(p1,2,mean)[idx] , lwd=3 , col=2 , cex=1 ) |
| 213 | + |
| 214 | + |
| 215 | + |
| 216 | +n <- table(dat$D,dat$U) |
| 217 | +Cn <- xtabs(dat$C ~ dat$D + dat$U) |
| 218 | +pC0 <- as.numeric( Cn[,1]/n[,1] ) |
| 219 | +pC1 <- as.numeric( Cn[,2]/n[,2] ) |
| 220 | + |
| 221 | +points( (pC0)[idx] , (pC1)[idx] , lwd=2 , cex=1 , pch=16 ) |
| 222 | +#points( (pC0) , (pC1) , lwd=2 , cex=2*n[,1]/100 + 0.5 ) |
| 223 | +#points( (pC0) , (pC1) , lwd=2 , cex=2*n[,2]/100 + 0.5 , col=4 ) |
| 224 | + |
| 225 | +p0x <- apply(p0,2,mean) |
| 226 | +p1x <- apply(p1,2,mean) |
| 227 | +for ( i in 1:61 ) { |
| 228 | + lines( c(pC0[i], p0x[i] ) , c(pC1[i], p1x[i] ) , col=grau() ) |
| 229 | +} |
| 230 | + |
| 231 | + |
| 232 | +# show raw proportions - have to skip 54 |
| 233 | +n <- table(dat$D) |
| 234 | +Cn <- xtabs(dat$C ~ dat$D) |
| 235 | +pC <- as.numeric( Cn/n ) |
| 236 | +pC <- c( pC[1:53] , NA , pC[54:60] ) |
| 237 | +points( pC , lwd=2 ) |
| 238 | + |
| 239 | +# only some labels via locator |
| 240 | +n <- table(dat$D) |
| 241 | +n <- as.numeric(n) |
| 242 | +n <- c( n[1:53] , 0 , n[54:60] ) |
| 243 | +identify( 1:61 , pC , labels=n , cex=1 ) |
| 244 | + |
| 245 | + |
| 246 | + |
| 247 | +#### |
| 248 | +# simple Cholesky factor example |
| 249 | + |
| 250 | +R <- matrix(c(1,0.6,0.6,1),2,2) |
| 251 | +L <- chol(R) |
0 commit comments