#### Application: Red State, Blue State, Rich State, Poor State

### Application 1: lmer practice: county, commuting zone, state

#install.packages("arm")
library(arm)

rm(list=ls())

# load the data
load('~/Dropbox/MLMWorkshop/mlmdata.Rdata')


# run a model with complete pooling
model1<-lm(repvote~state.ideology+socialcapital+top1+log.medianincome+unemployment+blackpop, data=data)

# compare it with state re
model2<-lmer(repvote~state.ideology+socialcapital+top1+log.medianincome+unemployment+blackpop+(1|state), data=data)

# state and cz re
model3<-lmer(repvote~state.ideology+socialcapital+top1+log.medianincome+unemployment+blackpop+(1|state)+(1|cz), data=data)

# varying intercept and varying slope
model4<-lmer(repvote~state.ideology+socialcapital+top1+log.medianincome+unemployment+blackpop+(1+log.medianincome|state)+(1|cz), data=data)


# compare results from four models
models<-c("model1", model2, model3, model4) 
Results<-array(NA, dim=c(7,8)) # set up results matrix
Results<-as.data.frame(Results) # make it as data frame 
rownames(Results)<-attributes(model1$coef)$names  # rownames as variable names
names(Results)<-c("pooling.coef", "pooling.T", "state.re.coef", "state.re.T", "state.cz.re.coef", "state.cz.re.T", "slope.coef", "slope.T") 
for(i in 1:length(models)){
  if (i==1){
  	Results[,i]<-summary(model1)$coef[,1]
  	Results[,i+1]<-summary(model1)$coef[,3]
  	}
  if (!i==1){
	Results[,2*(i-1)+1]<-fixef(models[i][[1]])
	Results[,2*i]<-fixef(models[i][[1]])/se.fixef(models[i][[1]])
	}
}

#### How explore the result
# model summary
summary(model3)
# estimated model within each state and CZ
coef(model3)
# to see only within state
coef(model3)$state
# check varying intercept and varying slope result
coef(model5)$state
# estimated model averaging over state 
fixef(model3)
# state-level errors
ranef(model3)$state
# standard errors of fixed effects
se.fixef(model3)
# standard errors of random effects
se.ranef(model3)

### Application 2: Replicating Gelman's model

rm(list=ls())

# load the data
load('~/Dropbox/MLMWorkshop/mlmdata2.Rdata')

# DV is vote choice 0, 1: glmer
### individual level income, state level mean income, and state random effect
model5<-glmer(repvote~income+meanincome+(1|state), data=data2, family="binomial"(link="logit"))

# We get an convergence error here. Let's try rescaling variables
data2$meanincome.rs<-rescale(data2$meanincome)
data2$income.rs<-rescale(data2$income)

model5<-glmer(repvote~income.rs+meanincome.rs+(1|state), data=data2, family="binomial"(link="logit"))

# summary of the result
summary(model5)
# state intercepts
coef(model5)

### Replicate Gelman's model using varying intercept and varying slope model
model6<-glmer(repvote~income.rs+(1+income.rs|state), data=data2, family="binomial"(link="logit"))

summary(model6) # not much information here
fixef(model6)
ranef(model6)

## Let's check if Gelman's hypothesis is correct. We will plot intercepts and slopes against state average income

results<-coef(model6)$state # save it as data frame
results$state<-rownames(results) # create state column
results$meanincome<-data2[match(results$state, data2$state), "meanincome"] # add state mean income variable
names(results)[1]<-"intercept" 

# Plot intercept vs. state income relationship
 p = ggplot(data = results, aes(x =meanincome, y = intercept)) 
 p +stat_smooth(method="loess", se=FALSE)+geom_text(aes(label=state),hjust=0,vjust=0, size=2.5)+theme_bw()+ylab("Intercept")+xlab("State Mean Income")+ggtitle("Intercept vs. State Income")

# Plot slope vs. state income 
 p = ggplot(data = results, aes(x =meanincome, y = income.rs)) 
 p +stat_smooth(method="loess", se=FALSE)+geom_text(aes(label=state),hjust=0,vjust=0, size=2.5)+theme_bw()+ylab("Slope")+xlab("State Mean Income")+ggtitle("Slope vs. State Income")

# Compare a plot with "no pooling" model and the model result
 p = ggplot(data = data2[data2$state=="CT"|data2$state=="OH"|data2$state=="MS"|data2$state=="CA",], aes(x =income, y = repvote, color=state))+stat_smooth(method="lm", se=FALSE, size=2)+theme_bw()
 p

results[results$state=="CT"|results$state=="OH"|results$state=="MS"|results$state=="CA",]


###### Without enough data points within groups, what happen to "no pooling model"?

# randomly drop 80% of the data
set.seed(12345)
randsel<-runif(nrow(data2))
data.dropped<-subset(data2, randsel>=.90)

# Check how much we lost
table(data2$state[data2$state=="CT"|data2$state=="OH"|data2$state=="MS"|data2$state=="CA"]) 
table(data.dropped$state[data.dropped$state=="CT"| data.dropped$state=="OH"| data.dropped$state=="MS"|data.dropped$state=="CA"]) 

# a plot with less data points
 q = ggplot(data = data.dropped[data.dropped$state=="CT"|data.dropped$state=="OH"|data.dropped$state=="MS"|data.dropped$state=="CA",], aes(x =income, y = repvote, color=state))+stat_smooth(method="lm", se=FALSE, size=2)+theme_bw()

# compare two plots 
 library(gridExtra)
 grid.arrange(p, q, ncol=2)


