Read In Files and Clean

votes = read.csv("C:/Users/onest/desktop/2012-and-2016-presidential-elections/votes.csv", header=T)

religion = read.csv("C:/Users/onest/desktop/2012-and-2016-presidential-elections/religion.csv", header=T)
religions = religion[,c(1,43,44,45,46,47,48,49,50,51,52,53,54,55)]
colnames(religions) = c("FIPS", "Total_Pop", "Evangelical", "Protestant", "Historically_Black", "Catholic", "Jewish", "Mormon", "Islamic", "Hindu", "Buddhist", "Orthodox", "Jehovas_Witnesses", "Other_Religion")

votes = merge(x=votes,y=religions,by="FIPS", all.x=T)

Exploratory Analaysis - Maps

Basics

colnames(votes)[c(29:31,33,36:39,41:43,47:54,56,58:75)] = c("Population 2012",
                                                         "Persons Under 5", 
                                                         "Persons Under 18",
                                                         "% Female 2014",
                                                         "Indian and Alaskan Native",
                                                         "Asian",
                                                         "Native Hawaiian",
                                                         "2+ Races",
                                                         "White",
                                                         "Living in Same House 1+ Years",
                                                         "Foreign Born",
                                                         "Veterans",
                                                         "Travel Time to Work",
                                                         "Housing Units 2014",
                                                         "Homeownership Rate",
                                                         "Housing Units in Multi-Unit Structures",
                                                         "Median Value of Owner-Occupied Housing Units",
                                                         "Households",
                                                         "Persons/Household",
                                                         "Median Household Income",
                                                         "Private Nonfarm Establishments 2013",
                                                         "Private Nonfarm Employment",
                                                         "% Change - Private Nonfarm Employment",
                                                         "Nonemployer Establishments - 2013",
                                                         "Total Number of Firms",
                                                         "Black-Owned Firms",
                                                         "Indidan and Alaskan -Owned Firms",
                                                         "Asian-Owned Firms",
                                                         "Hawaiian-Owned Firms",
                                                         "Hispanic-Owned Firms",
                                                         "Women", 
                                                         "Manufacturers Shipments - 2007",
                                                         "Merchant Wholesaler Sales - 2007",
                                                         "Retail Sales - 2007",
                                                         "Retail Sales / Capita - 2007",
                                                         "Accommodation and Food Service Sales - 2007",
                                                         "Building Permits",
                                                         "Land Area (in sq miles)")
colnames(votes)
##  [1] "FIPS"                                        
##  [2] "X.1"                                         
##  [3] "X"                                           
##  [4] "combined_fips"                               
##  [5] "votes_dem_2016"                              
##  [6] "votes_gop_2016"                              
##  [7] "total_votes_2016"                            
##  [8] "Clinton"                                     
##  [9] "Trump"                                       
## [10] "diff_2016"                                   
## [11] "per_point_diff_2016"                         
## [12] "state_abbr"                                  
## [13] "county_name"                                 
## [14] "total_votes_2012"                            
## [15] "votes_dem_2012"                              
## [16] "votes_gop_2012"                              
## [17] "county_fips"                                 
## [18] "state_fips"                                  
## [19] "Obama"                                       
## [20] "Romney"                                      
## [21] "diff_2012"                                   
## [22] "per_point_diff_2012"                         
## [23] "fips"                                        
## [24] "area_name"                                   
## [25] "state_abbreviation"                          
## [26] "population2014"                              
## [27] "population2010"                              
## [28] "population_change"                           
## [29] "Population 2012"                             
## [30] "Persons Under 5"                             
## [31] "Persons Under 18"                            
## [32] "age65plus"                                   
## [33] "% Female 2014"                               
## [34] "White"                                       
## [35] "Black"                                       
## [36] "Indian and Alaskan Native"                   
## [37] "Asian"                                       
## [38] "Native Hawaiian"                             
## [39] "2+ Races"                                    
## [40] "Hispanic"                                    
## [41] "White"                                       
## [42] "Living in Same House 1+ Years"               
## [43] "Foreign Born"                                
## [44] "NonEnglish"                                  
## [45] "Edu_highschool"                              
## [46] "Edu_batchelors"                              
## [47] "Veterans"                                    
## [48] "Travel Time to Work"                         
## [49] "Housing Units 2014"                          
## [50] "Homeownership Rate"                          
## [51] "Housing Units in Multi-Unit Structures"      
## [52] "Median Value of Owner-Occupied Housing Units"
## [53] "Households"                                  
## [54] "Persons/Household"                           
## [55] "Income"                                      
## [56] "Median Household Income"                     
## [57] "Poverty"                                     
## [58] "Private Nonfarm Establishments 2013"         
## [59] "Private Nonfarm Employment"                  
## [60] "% Change - Private Nonfarm Employment"       
## [61] "Nonemployer Establishments - 2013"           
## [62] "Total Number of Firms"                       
## [63] "Black-Owned Firms"                           
## [64] "Indidan and Alaskan -Owned Firms"            
## [65] "Asian-Owned Firms"                           
## [66] "Hawaiian-Owned Firms"                        
## [67] "Hispanic-Owned Firms"                        
## [68] "Women"                                       
## [69] "Manufacturers Shipments - 2007"              
## [70] "Merchant Wholesaler Sales - 2007"            
## [71] "Retail Sales - 2007"                         
## [72] "Retail Sales / Capita - 2007"                
## [73] "Accommodation and Food Service Sales - 2007" 
## [74] "Building Permits"                            
## [75] "Land Area (in sq miles)"                     
## [76] "Density"                                     
## [77] "Clinton_Obama"                               
## [78] "Trump_Romney"                                
## [79] "Trump_Prediction"                            
## [80] "Clinton_Prediction"                          
## [81] "Trump_Deviation"                             
## [82] "Clinton_Deviation"                           
## [83] "Total_Pop"                                   
## [84] "Evangelical"                                 
## [85] "Protestant"                                  
## [86] "Historically_Black"                          
## [87] "Catholic"                                    
## [88] "Jewish"                                      
## [89] "Mormon"                                      
## [90] "Islamic"                                     
## [91] "Hindu"                                       
## [92] "Buddhist"                                    
## [93] "Orthodox"                                    
## [94] "Jehovas_Witnesses"                           
## [95] "Other_Religion"

Vote Share Plots

library("choroplethr")
## Warning: package 'choroplethr' was built under R version 3.3.3
## Loading required package: acs
## Warning: package 'acs' was built under R version 3.3.3
## Loading required package: stringr
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.3.3
## Loading required package: XML
## Warning: package 'XML' was built under R version 3.3.3
## 
## Attaching package: 'acs'
## The following object is masked from 'package:base':
## 
##     apply
library("choroplethrMaps")
## Warning: package 'choroplethrMaps' was built under R version 3.3.3
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.3.3
#Explore total votes per county
total_votes = votes[,c(1,7)]
colnames(total_votes) = c('region','value')

vote_count = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County",
                               state_zoom=c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
vote_count

#Break down county vote into regions of the US for easier viewing

##New England Region
vote_count_NE = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - New England",
                               state_zoom = c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
vote_count_NE

##Mid-Atlantic Region
vote_count_MA = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - Mid-Atlantic",
                               state_zoom = c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
vote_count_MA

##South East Region
vote_count_SE = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - South East",
                               state_zoom = c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
vote_count_SE

##Mid West Region
vote_count_MW = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - Mid-West",
                               state_zoom = c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
vote_count_MW

##South West Region
vote_count_SW = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - South West",
                               state_zoom = c("texas","oklahoma","new mexico","arizona"))
vote_count_SW

##West Region
vote_count_W = county_choropleth(total_votes,
                               legend = "Votes",
                               num_colors = 9,
                               title = "Vote Count by County - West",
                               state_zoom = c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
vote_count_W

#Explore Vote Count by Swing States
vote_count_swing = county_choropleth(total_votes,
                                     legend = "Votes",
                                     num_colors = 9,
                                     title = "Vote Count by County - Swing States",
                                     state_zoom = c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
vote_count_swing

Obama to Clinton (2012 to 2016 Change)

votes$change_dem_votes = votes$votes_dem_2016 - votes$votes_dem_2012

dem_votes = votes[,c(1,96)]
dem_votes[,3] = NA
colnames(dem_votes) = c('region','votes','value')

for(i in seq(1:dim(dem_votes)[1])){
  if(dem_votes[i,2] > 0 && dem_votes[i,2] < 1000){
    dem_votes[i,3] = "Gain - Small (<1000)"
  } else if(dem_votes[i,2] > 1000 && dem_votes[i,2] < 10000){
    dem_votes[i,3] = "Gain - Considerable (1000-10000)"
  } else if(dem_votes[i,2] >= 10000){
    dem_votes[i,3] = "Gain - Large (>10000)"
  } else if(dem_votes[i,2] < 0 && dem_votes[i,2] > -1000) {
    dem_votes[i,3] = "Loss - Small (<1000)"
  } else if(dem_votes[i,2] < -1000 && dem_votes[i,2] > -10000){
    dem_votes[i,3] = "Loss - Considerable (1000-10000)"
  } else if(dem_votes[i,2] < -10000){
    dem_votes[i,3] = "Loss - Large (>10000)"
  } else{
  dem_votes[i,3] = "Equal"
  }
}

c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton where Blue Represents better Clinton Performance"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white", "blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
dem_change_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_US

#Break down county vote into regions of the US for easier viewing

##New England Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
dem_change_NE = c$render() + 
              theme(legend.position = "right")
dem_change_NE

##Mid-Atlantic Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
dem_change_MA = c$render() + 
              theme(legend.position = "right")
dem_change_MA

##South East Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
dem_change_SE = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_SE

##Mid West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
dem_change_MW = c$render() + 
              theme(legend.position = "right")
dem_change_MW

##South West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
dem_change_SW = c$render() + 
              theme(legend.position = "right")
dem_change_SW

##West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
dem_change_W = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
dem_change_W

#Explore Vote Count by Swing States
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
dem_change_swing = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_swing

Romney to Trump (2012 to 2016 Change)

votes$change_rep_votes = votes$votes_gop_2016 - votes$votes_gop_2012

rep_votes = votes[,c(1,97)]
rep_votes[,3] = NA
colnames(rep_votes) = c('region','votes','value')

for(i in seq(1:dim(rep_votes)[1])){
  if(rep_votes[i,2] > 0 && rep_votes[i,2] < 1000){
    rep_votes[i,3] = "Gain - Small (<1000)"
  } else if(rep_votes[i,2] > 1000 && rep_votes[i,2] < 10000){
    rep_votes[i,3] = "Gain - Considerable (1000-10000)"
  } else if(rep_votes[i,2] >= 10000){
    rep_votes[i,3] = "Gain - Large (>10000)"
  } else if(rep_votes[i,2] < 0 && rep_votes[i,2] > -1000) {
    rep_votes[i,3] = "Loss - Small (<1000)"
  } else if(rep_votes[i,2] < -1000 && rep_votes[i,2] > -10000){
    rep_votes[i,3] = "Loss - Considerable (1000-10000)"
  } else if(rep_votes[i,2] < -10000){
    rep_votes[i,3] = "Loss - Large (>10000)"
  } else{
  rep_votes[i,3] = "Equal"
  }
}

c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump where Red Represents better Trump Performance"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
rep_change_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_US

#Break down county vote into regions of the US for easier viewing

##New England Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
rep_change_NE = c$render() + 
              theme(legend.position = "right")
rep_change_NE

##Mid-Atlantic Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
rep_change_MA = c$render() + 
              theme(legend.position = "right")
rep_change_MA

##South East Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
rep_change_SE = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_SE

##Mid West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
rep_change_MW = c$render() + 
              theme(legend.position = "right")
rep_change_MW

##South West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
rep_change_SW = c$render() + 
              theme(legend.position = "right")
rep_change_SW

##West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
rep_change_W = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
rep_change_W

#Explore Vote Count by Swing States
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
rep_change_swing = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_swing

#Interesting State Examiniation

Examine Wisconsin

#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Wisconsin"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("blue","red","darkred","lightpink"))
c$set_zoom("wisconsin")
dem_change_WI = c$render() + 
              theme(legend.position = "right")
dem_change_WI

#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Wisconsin"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("red","lightpink","blue","navy","deepskyblue"))
c$set_zoom("wisconsin")
rep_change_WI = c$render() + 
              theme(legend.position = "right")
rep_change_WI

#Appear to be more significant losses for Clinton than gains for Trump

Examine Texas

#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Texas"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","lightpink"))
c$set_zoom("texas")
dem_change_TX = c$render() + 
              theme(legend.position = "right")
dem_change_TX

#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Texas"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred", "lightpink","blue","navy","deepskyblue"))
c$set_zoom("texas")
rep_change_TX = c$render() + 
              theme(legend.position = "right")
rep_change_TX

Examine Arizona

#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Arizona"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","lightpink"))
c$set_zoom("arizona")
dem_change_AZ = c$render() + 
              theme(legend.position = "right")
dem_change_AZ

#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Arizona"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("red","lightpink","blue","navy","deepskyblue"))
c$set_zoom("arizona")
rep_change_AZ = c$render() + 
              theme(legend.position = "right")
rep_change_AZ

Percentage Shift from 2012

votes$per_shift = votes$Trump_Romney - votes$Clinton_Obama

shift = votes[,c(1,98)]

summary(shift$per_shift)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.41210  0.04355  0.10060  0.10620  0.17450  0.46760
ggplot(shift,aes(x="distribution",y=per_shift)) + geom_boxplot(fill = "firebrick", colour = "darkblue") + ggtitle("County Shifts toward Republican from 2012 to 2016") + ylab("Percentage Shift toward Republican from 2012 to 2016")

shift[,3] = NA
colnames(shift) = c('region','shift','value')

for(i in seq(1:dim(shift)[1])){
  if(shift[i,2] > 0 && shift[i,2] < .05){
    shift[i,3] = "GOP - Small (<5%)"
  } else if(shift[i,2] > .05 && shift[i,2] < .10){
    shift[i,3] = "GOP - Considerable (<10%)"
  } else if(shift[i,2] >= .10){
    shift[i,3] = "GOP - Large (>10%)"
  } else if(shift[i,2] < 0 && shift[i,2] > -.05) {
    shift[i,3] = "Dem - Small (<5%)"
  } else if(shift[i,2] < -.05 && shift[i,2] > -.10){
    shift[i,3] = "Dem - Considerable (<10%)"
  } else if(shift[i,2] < -.10){
    shift[i,3] = "Dem - Large (>10%)"
  } else{
  shift[i,3] = "Equal"
  }
}

#Entire country
c = CountyChoropleth$new(shift)
c$title = "Shift from 2012 to 2016 by County Percentage"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
per_change_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_US

#Break down county vote into regions of the US for easier viewing

##New England Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
per_change_NE = c$render() + 
              theme(legend.position = "right")
per_change_NE

##Mid-Atlantic Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
per_change_MA = c$render() + 
              theme(legend.position = "right")
per_change_MA

##South East Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
per_change_SE = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_SE

##Mid West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
per_change_MW = c$render() + 
              theme(legend.position = "right")
per_change_MW

##South West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
per_change_SW = c$render() + 
              theme(legend.position = "right")
per_change_SW

##West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
per_change_W = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
per_change_W

#Explore Vote Count by Swing States
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
per_change_swing = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_swing

Analyze shifts at county level

total_shift = merge(x=dem_votes,y=rep_votes,by="region",all=TRUE)
total_shift[,6] = NA
colnames(total_shift) = c("region","dem_votes","dem_gain_loss","rep_votes","rep_gain_loss","value")

for(i in seq(i:dim(total_shift)[1])){
  if(total_shift[i,2] < 0){
    if(total_shift[i,4] < 0){
      total_shift[i,6] = "Dem Loss/GOP Loss"
    } else if(total_shift[i,4] > 0) {
      total_shift[i,6] = "Dem Loss/GOP Gain"
    } else {
      total_shift[i,6] = "Dem Loss/GOP Equal"
    }
  } else if(total_shift[i,2] > 0){
    if(total_shift[i,4] < 0){
      total_shift[i,6] = "Dem Gain/GOP Loss"
    } else if(total_shift[i,4] > 0){
      total_shift[i,6] = "Dem Gain/GOP Gain"
    } else {
      total_shift[i,6] = "Dem Gain/GOP Equal"
    }
  } else {
    if(total_shift[i,4] < 0){
      total_shift[i,6] = "Dem Equal/GOP Loss"
    } else if(total_shift[i,4] > 0){
      total_shift[i,6] = "Dem Equal/GOP Gain"
    } else {
      total_shift[i,6] = "Dem Equal/GOP Equal"
    }
  }
}

#Entire country
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes for Parties"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("lightpink", "deepskyblue","yellow","navy","grey","red","green"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
shift_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_US

#Break down county vote into regions of the US for easier viewing

#New England Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
shift_NE = c$render() + 
              theme(legend.position = "right")
shift_NE

##Mid-Atlantic Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
shift_MA = c$render() + 
              theme(legend.position = "right")
shift_MA

##South East Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","red","green"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
shift_SE = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_SE

##Mid West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
shift_MW = c$render() + 
              theme(legend.position = "right")
shift_MW

##South West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","red","green"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
shift_SW = c$render() + 
              theme(legend.position = "right")
shift_SW

##West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","grey","red","green"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
shift_W = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
shift_W

#Explore Vote Count by Swing States
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","lightpink","red","green"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
shift_swing = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_swing

Vote Share at State Level

#create empty vector to store the state fips values
state_levels = vector()

#store unique fips in vector
for(i in seq(i:dim(votes)[1])){
  if(!is.element(votes[i,18],state_levels)){
    state_levels = c(state_levels,votes[i,18])
  }
}

#create dataframe, add state_levels to it, rename it to "fips.numeric" in order to join with state.regions data to get the region names
states = data.frame(state_levels)
states[,c(2:9)] = 0
colnames(states)[1] = "fips.numeric"

#read in data, merge, drop duplicate column, and rename columns
data(state.regions)
states = merge(x=states,y=state.regions,by="fips.numeric")
states = states[,c(1:11)]
colnames(states) = c("fips", "dem_votes_2016", "gop_votes_2016", "total_votes_2016", "dem_votes_2012", "gop_votes_2012", "total_votes_2012","pop_2010","pop_2014","region","abb")

for(i in seq(1:dim(votes)[1])){
  for(j in seq(1:dim(states)[1])){
    if(votes[i,12] == states[j,11]){
      states[j,2] = states[j,2] + votes[i,5]
      states[j,3] = states[j,3] + votes[i,6]
      states[j,4] = states[j,4] + votes[i,7]
      states[j,5] = states[j,5] + votes[i,15]
      states[j,6] = states[j,6] + votes[i,16]
      states[j,7] = states[j,7] + votes[i,14]
      states[j,8] = states[j,8] + votes[i,27]
      states[j,9] = states[j,9] + votes[i,26]
    }
  }
}

#Plot Voter Participation Changes
states$participation_change = states$total_votes_2016 - states$total_votes_2012

voting_num = states[,c(10,12)]
colnames(voting_num) = c("region","value")

ggplot(voting_num,aes(x="distribution",y=value)) + geom_boxplot(fill = "firebrick", colour = "darkblue") + ggtitle("County Shifts in Total Votes Cast from 2012 to 2016") + ylab("Total Number of Votes Cast Change")

c = StateChoropleth$new(voting_num)
c$title = "Shift from 2012 to 2016 by Total Number of Votes Cast"
c$add_state_outline = TRUE
c$legend = "Change in Votes Cast"
c$set_num_colors(1)
vote_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
vote_US

states$pop_2012 = states$pop_2010 + (states$pop_2014 - states$pop_2010)

states$pop_2016 = states$pop_2014 + (states$pop_2014 - states$pop_2010)

states$per_vot_change = 100 * ((states$total_votes_2016/states$pop_2016) - (states$total_votes_2012/states$pop_2012))

per_vote_num = states[,c(10,15)]
colnames(per_vote_num)= c("region","value")


c = StateChoropleth$new(per_vote_num)
c$title = "Shift in Turnout from 2012 to 2016 by Estimated Population Size"
c$add_state_outline = TRUE
c$legend = "% Change in Votes Cast"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("navy","blue","lightblue","white", "lightpink", "red", "firebrick","darkred"))
per_vote_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
per_vote_US

#Results
states$DT_per = states$gop_votes_2016/states$total_votes_2016

states$HC_per = states$dem_votes_2016/states$total_votes_2016

states$third = (1 - states$DT_per - states$HC_per) * 100

states$DT_margin = (states$DT_per - states$HC_per) * 100

ggplot(states,aes(x="distribution",y=DT_margin)) + geom_boxplot(fill = "firebrick", colour = "darkblue") + ggtitle("% Donald Trump Won Per State") + ylab("Donald Trump %")

states$winner = NA

for(i in seq(1:dim(states)[1])){
  if(states[i,19] < -10){
    states[i,20] = "Clinton (>10%)"
  } 
  if(states[i,19] > -10 && states[i,19] < -5){
    states[i,20] = "Clinton (5% - 10%)"
  }
  if(states[i,19] > -5 && states[i,19] < -2){
    states[i,20] = "Clinton (2% - 5%)"
  } 
  if(states[i,19] > -2 && states[i,19] < 0){
    states[i,20] = "Clinton (<2%)"
  } 
  if(states[i,19] > 10){
    states[i,20] = "Trump (>10%)"
  } 
  if(states[i,19] < 10 && states[i,19] > 5){
    states[i,20] = "Trump (5% - 10%)"
  } 
  if(states[i,19] < 5 && states[i,19] > 2){
    states[i,20] = "Trump (2% - 5%)"
  } 
  if(states[i,19] < 2 && states[i,19] > 0){
    states[i,20] = "Trump (<2%)"
  } 
}

state_winner = states[,c(10,20)]
colnames(state_winner) = c("region","value")

c = StateChoropleth$new(state_winner)
c$title = "2016 State Winner Margin"
c$add_state_outline = TRUE
c$legend = "Win Margin %"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "firebrick","red", "darkred"))
winner_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
winner_US

#Third Party Vote
ggplot(states,aes(x="distribution",y=third)) + geom_boxplot(fill = "firebrick", colour = "darkblue") + ggtitle("% Third Party Per State") + ylab("Third Party Voting Percent per State")

state_third = states[,c(10,18)]

state_third$category = NA

for(i in seq(1:dim(state_third)[1])){
  if(state_third[i,2] < 3){
    state_third[i,3] = "< 3%"
  }
  if(state_third[i,2] > 3 && state_third[i,2] < 5){
    state_third[i,3] = "3% - 5%"
  }
  if(state_third[i,2] > 5 && state_third[i,2] < 7){
    state_third[i,3] = "5% - 7%"
  }
  if(state_third[i,2] > 7 && state_third[i,2] < 10){
    state_third[i,3] = "7% - 10%"
  }
  if(state_third[i,2] > 10 && state_third[i,2] < 15){
    state_third[i,3] = "10% - 15%"
  }
  if(state_third[i,2] > 15){
    state_third[i,3] = "> 15%"
  }
}

colnames(state_third) = c("region","third", "value")

c = StateChoropleth$new(state_third)
c$title = "2016 Third Party Vote by State"
c$add_state_outline = TRUE
c$legend = "Third Party %"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("purple", "red","orange","blue", "green","yellow"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
state_third_US = c$render() + 
              theme(legend.position = "right")
state_third_US

Examine County Level Winner

county_winner = votes[,c(1,8,9)]

county_winner$Trump_margin = (county_winner$Trump - county_winner$Clinton) * 100


for(i in seq(1:dim(county_winner)[1])){
  if(county_winner[i,4] < -10){
    county_winner[i,5] = "Clinton (>10%)"
  } 
  if(county_winner[i,4] > -10 && county_winner[i,4] < -5){
    county_winner[i,5] = "Clinton (5% - 10%)"
  }
  if(county_winner[i,4] > -5 && county_winner[i,4] < -2){
    county_winner[i,5] = "Clinton (2% - 5%)"
  } 
  if(county_winner[i,4] > -2 && county_winner[i,4] < 0){
    county_winner[i,5] = "Clinton (<2%)"
  } 
  if(county_winner[i,4] > 10){
    county_winner[i,5] = "Trump (>10%)"
  } 
  if(county_winner[i,4] < 10 && county_winner[i,4] > 5){
    county_winner[i,5] = "Trump (5% - 10%)"
  } 
  if(county_winner[i,4] < 5 && county_winner[i,4] > 2){
    county_winner[i,5] = "Trump (2% - 5%)"
  } 
  if(county_winner[i,4] < 2 && county_winner[i,4] > 0){
    county_winner[i,5] = "Trump (<2%)"
  } 
}


colnames(county_winner)[1] = "region"
colnames(county_winner)[5] = "value"

plot(density(county_winner[,4]), 
         main = "Trump County Margin Density Plot",
         ylab = "density")

c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin %"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_US = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_US

#Break Down By Region for Easy Viewing

#New England
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - New England"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
county_NE = c$render() + 
              theme(legend.position = "right")
county_NE

##Mid-Atlantic Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
county_MA = c$render() + 
              theme(legend.position = "right")
county_MA

##South East Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - South East"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
county_SE = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_SE

##Mid West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Mid West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
county_MW = c$render() + 
              theme(legend.position = "right")
county_MW

##South West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - South West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
county_SW = c$render() + 
              theme(legend.position = "right")
county_SW

##West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
county_W = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
county_W

#Explore Vote Count by Swing States
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Swing States"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue",  "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
county_swing = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_swing

Analyze Counties that Flipped 2012 to 2016

votes$flips = NA

for(i in seq(1:dim(votes)[1])){
  if(votes[i,9] > votes[i,8] && votes[i,19] > votes[i,20]){
    votes[i,99] = "OBAMA to TRUMP"
  } else if(votes[i,9] < votes[i,8] && votes[i,19] < votes[i,20]){
    votes[i,99] = "ROMNEY to CLINTON"
  } else {
    votes[i,99] = "Solid County"
  }
}

flips = votes[,c(1,99)]
colnames(flips) = c("region","value")

c = CountyChoropleth$new(flips)
c$title = "County Flips from 2012 to 2016"
c$add_state_outline = TRUE
c$legend = "County Status"
c$set_num_colors(3)
c$ggplot_scale = scale_fill_manual(values = c("red","blue","white"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_flips = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_flips

#Number of Clinton Flips
length(which(votes$flips == "ROMNEY to CLINTON"))
## [1] 20
#Number of Trump Flips
length(which(votes$flips == "OBAMA to TRUMP"))
## [1] 218
#Number of counties that did not change
length(which(votes$flips == "Solid County"))
## [1] 2874

Exploratory Analysis - Religion

Explore religious variables

#total religious population
religious = votes[,c(1,83)]
colnames(religious) = c("region","value")

c= CountyChoropleth$new(religious)
c$title = "Total Religious Population"
c$add_state_outline = TRUE
c$legend = "Religious Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_religious = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_religious

#Evangelical Population
evangelical = votes[,c(1,84)]
colnames(evangelical) = c("region","value")
evangelical$value = cut(evangelical$value, breaks = c(0,1,5,10,20,Inf))

c= CountyChoropleth$new(evangelical)
c$title = "Evangelical Population"
c$add_state_outline = TRUE
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","darkseagreen1", "greenyellow","green",  "darkgreen"))
c$legend = "Evangelical Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_evangelical = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 31007, 49009, 46075, 46117, 48301, 38087, 49033, 31005,
## 48269, 51515, 48109, 31165, 32029, 30103, 38085, 30039, 48261, 31113, 8014,
## 32009, 31085, 31117, 8047, 49029, 8023, 16071, 16025, 16033, 16041
county_evangelical

#Catholic Population
catholic = votes[,c(1,87)]
colnames(catholic) = c("region","value")
catholic$value = cut(catholic$value, breaks = c(0,1,5,10,20,Inf))

c= CountyChoropleth$new(catholic)
c$title = "Catholic Population"
c$add_state_outline = TRUE
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","thistle1", "plum3","purple",  "purple4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
c$legend = "Catholic Percentage"
county_catholic = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 28009, 28015, 28021, 28031, 13239, 47025, 47033, 47061,
## 31169, 39105, 47067, 47087, 47095, 47135, 47137, 40029, 48045, 48081,
## 38027, 38039, 18115, 40085, 40099, 40107, 30071, 48403, 48417, 48431,
## 30109, 31007, 35021, 47175, 45061, 31115, 28101, 28119, 28129, 40149,
## 29017, 48447, 49001, 29061, 29067, 29079, 37011, 42023, 17169, 41049,
## 29129, 37095, 37111, 37143, 13125, 49009, 49023, 48033, 29175, 29181,
## 29197, 29203, 13169, 45017, 46063, 46075, 13197, 13201, 13195, 13205,
## 13209, 13213, 13221, 13229, 13231, 13235, 13269, 20187, 20189, 20207,
## 21201, 13243, 22091, 13253, 13265, 13281, 13289, 13295, 13301, 48301,
## 20049, 56017, 56019, 54027, 54059, 56023, 51015, 51025, 51036, 51049,
## 51069, 51081, 51091, 51103, 53059, 54017, 54043, 21007, 21061, 21069,
## 21131, 38087, 49033, 49055, 1041, 1057, 1129, 21165, 51079, 12129, 13011,
## 13019, 13027, 21187, 13033, 13061, 5149, 28023, 1019, 13007, 13065, 28069,
## 22013, 47127, 8079, 47133, 47159, 45069, 46077, 51097, 1035, 1063, 1075,
## 12077, 29185, 30021, 40061, 40069, 21031, 5109, 5117, 13307, 13315, 32011,
## 21063, 32027, 21087, 13105, 21103, 31005, 49017, 48059, 21137, 48075, 1105,
## 1111, 17087, 17151, 48119, 48269, 18005, 13143, 13183, 13193, 13207, 13219,
## 13237, 13249, 37007, 37033, 37073, 51133, 51179, 51515, 13259, 13283,
## 20025, 20033, 37179, 1133, 21203, 21235, 22025, 22083, 48101, 48111, 48125,
## 48345, 48351, 51163, 51181, 51183, 48197, 48237, 51640, 51685, 51750,
## 53069, 8113, 18171, 20129, 5021, 21119, 21177, 19051, 19185, 37079, 20021,
## 28055, 28111, 28125, 28131, 47023, 40067, 40105, 40151, 28037, 40129,
## 28061, 31165, 31175, 28161, 29005, 29025, 32015, 30103, 38085, 39163,
## 47073, 47097, 47121, 48407, 48495, 29199, 31015, 31021, 24019, 29063,
## 37015, 37029, 40001, 40007, 40041, 47171, 29227, 37177, 31103, 37131, 5075,
## 5077, 16007, 26083, 18155, 13167, 13177, 13287, 54063, 51071, 51077, 51089,
## 51159, 51175, 51570, 51735, 47169, 5049, 5127, 47015, 38083, 13251, 13263,
## 13273, 21223, 21089, 51021, 51027, 51037, 29211, 8014, 30037, 31137, 31171,
## 8025, 31183, 32009, 32033, 30069, 30107, 31009, 31073, 31085, 31097, 31117,
## 28103, 38007, 12125, 13001, 45005, 45009, 47007, 47057, 13055, 47081,
## 47173, 45049, 46095, 13079, 41069, 48393, 48433, 48009, 48011, 48079,
## 48095, 8053, 48159, 8057, 48247, 48263, 48349, 51530, 8111, 54013, 49029,
## 49031, 51007, 51017, 51063, 51075, 51111, 1029, 1037, 1059, 1067, 1085,
## 5147, 6003, 1119, 1131, 1007, 1011, 6049, 1065, 13149, 13093, 13101, 13119,
## 8061, 12089, 13003, 13005, 13023, 13025, 13037, 13053, 12041, 1079, 8081,
## 8103, 5025, 5073, 5081, 5099, 13321, 5013, 16071, 16077, 5101, 5111, 17047,
## 5129, 16025, 16033, 16041, 13309, 16065, 13313, 13141, 13155, 13159, 13171,
## 13319, 20099, 22081, 21139, 21153, 21159, 13181, 19075, 16051, 20017, 13211
county_catholic

#Mormon Population
mormon = votes[,c(1,89)]
colnames(mormon) = c("region","value")
mormon$value = cut(mormon$value, breaks = c(0,1,5,10,20,Inf))

c= CountyChoropleth$new(mormon)
c$title = "Mormon Population"
c$add_state_outline = TRUE
c$legend = "Mormon Percentage"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","lightpink", "lightpink3","firebrick1",  "firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_mormon = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 1001, 27097, 27099, 27111, 27119, 27129, 27133, 19059, 27141,
## 27149, 27157, 27163, 28005, 17005, 28009, 28011, 28015, 28021, 19149,
## 28045, 28051, 28053, 17009, 28067, 28073, 13239, 27077, 45031, 45035,
## 45037, 47017, 47021, 47025, 47033, 47041, 47047, 23015, 28041, 31149,
## 31151, 17035, 31169, 31181, 32005, 32013, 39105, 39107, 39117, 39125,
## 39129, 39139, 39149, 39161, 47067, 47071, 47077, 47083, 47087, 17065,
## 47095, 47101, 47107, 47109, 47111, 47119, 47135, 47137, 1107, 18051, 18059,
## 33019, 39167, 39175, 40003, 40011, 40025, 40029, 40035, 40045, 40055,
## 48045, 48055, 48065, 48073, 48077, 48081, 48087, 24047, 25007, 25015,
## 34033, 34041, 30055, 37195, 37199, 38005, 38009, 38027, 38029, 38037,
## 38039, 38043, 38047, 38053, 18115, 38059, 38063, 40085, 40091, 40099,
## 18125, 40101, 40107, 26003, 18131, 30091, 30097, 1121, 18139, 38065, 38067,
## 38071, 38075, 38081, 38089, 38093, 38097, 39001, 18153, 39011, 39013,
## 39019, 48369, 18173, 48385, 48387, 48399, 48403, 48417, 48419, 48427,
## 18179, 48431, 26019, 26027, 30109, 31007, 31011, 31013, 31019, 31025,
## 31029, 31039, 31051, 31057, 31071, 19009, 39033, 35019, 35021, 35025,
## 35033, 47143, 47147, 47155, 47161, 47167, 19021, 47175, 45055, 45061,
## 45065, 45071, 45073, 26041, 19031, 17131, 13191, 31079, 31083, 31087,
## 31099, 19039, 31101, 31105, 31115, 31119, 28097, 28101, 28119, 28129,
## 36013, 40137, 40145, 17079, 40149, 41013, 41021, 26087, 26093, 26095,
## 28141, 28143, 17103, 28155, 28157, 28163, 29017, 29023, 29035, 29041,
## 29045, 17107, 36041, 36051, 36073, 48435, 48437, 48447, 48483, 17129,
## 48489, 26071, 26131, 26135, 17139, 29051, 29055, 29057, 29067, 29073,
## 17145, 29087, 17155, 36093, 36099, 42023, 17169, 45089, 46033, 46039,
## 46045, 46051, 17175, 46059, 26159, 27011, 27017, 27025, 27041, 27043,
## 27059, 37037, 17189, 37059, 37069, 37075, 37077, 17203, 42029, 42049,
## 42067, 42073, 27063, 27065, 27069, 29111, 29125, 29129, 29139, 29141,
## 29153, 29157, 37095, 37115, 18033, 37121, 37139, 37143, 37153, 13125,
## 42109, 42119, 13129, 48017, 48023, 48033, 13131, 48035, 29171, 29186,
## 29197, 29207, 37169, 39071, 39077, 44001, 44003, 45001, 45017, 45023,
## 46063, 46065, 46069, 46075, 46079, 46085, 46097, 46101, 46115, 46117,
## 46127, 30005, 30019, 31129, 13197, 31133, 19015, 19019, 19023, 19027,
## 13201, 17091, 17093, 17099, 17101, 17117, 17125, 17127, 13189, 13195,
## 13199, 13205, 13209, 13213, 13221, 13229, 13231, 13225, 13235, 13241,
## 13247, 13257, 13267, 13269, 20183, 20187, 20197, 20207, 21191, 21201,
## 21205, 21207, 21221, 13243, 21233, 21237, 21239, 22005, 22091, 22093,
## 22099, 22113, 13253, 21013, 21015, 21027, 17137, 13265, 17147, 17153,
## 17191, 13279, 13281, 13289, 13301, 20041, 20047, 13285, 20053, 20063,
## 20073, 20077, 20087, 22023, 22031, 22035, 22041, 22043, 13303, 21039,
## 21041, 21057, 21065, 21079, 21085, 20027, 21091, 21097, 48267, 48271,
## 48281, 48285, 48287, 48291, 48293, 48295, 48301, 48305, 48317, 48327,
## 48331, 20039, 48333, 51810, 51840, 53003, 53017, 20049, 53023, 51600,
## 55107, 55111, 55119, 55125, 20065, 56017, 56019, 54025, 54033, 20075,
## 54045, 54051, 54053, 54059, 54065, 54069, 54075, 20085, 54087, 54091,
## 54093, 54099, 54103, 54105, 55011, 55013, 20095, 55019, 55021, 55023,
## 50003, 20105, 50011, 51005, 51011, 51019, 20115, 51031, 51033, 51036,
## 51045, 51049, 51051, 54081, 20127, 51069, 51081, 51091, 51095, 51103,
## 51119, 50025, 20135, 54017, 54029, 54043, 54055, 54073, 20145, 54095,
## 51115, 22111, 21001, 21005, 21007, 21019, 21037, 21049, 21061, 21069,
## 21081, 21109, 21125, 21131, 40037, 20163, 37187, 38003, 38013, 38025,
## 38041, 38055, 38069, 38087, 38095, 39021, 54109, 55027, 50013, 20185,
## 51009, 51023, 51035, 51053, 51065, 1027, 20195, 1115, 1129, 21165, 21181,
## 19043, 20203, 19073, 19081, 19091, 19115, 19131, 19147, 19157, 20205,
## 19165, 19187, 19197, 35059, 36077, 51079, 12129, 13011, 21187, 13033,
## 13039, 13061, 21189, 6091, 8011, 8019, 20023, 27071, 27079, 27087, 27101,
## 27107, 27125, 27131, 27143, 27159, 27169, 28007, 28035, 28043, 36113,
## 42025, 42053, 21219, 42075, 42099, 1013, 1019, 21229, 8049, 12121, 12133,
## 22001, 13007, 13035, 13047, 13065, 13075, 28057, 28069, 22119, 22125,
## 22013, 25003, 45033, 47051, 47075, 47099, 47127, 13085, 13099, 13117, 8055,
## 8065, 8079, 8089, 12007, 22077, 26059, 26113, 26119, 27013, 27023, 27033,
## 47133, 47159, 47165, 22105, 45059, 45069, 45087, 46025, 46037, 46049,
## 46057, 46067, 46077, 51097, 51109, 1035, 1039, 1053, 1063, 21003, 1075,
## 12047, 12067, 12077, 5011, 5037, 5055, 5065, 5079, 29115, 29121, 29163,
## 29173, 29185, 29223, 46091, 21025, 46107, 46119, 46129, 40057, 40061,
## 40069, 40075, 40093, 40103, 5141, 21031, 40133, 40141, 5117, 5135, 13307,
## 13311, 13315, 21053, 17017, 31123, 31127, 31143, 31167, 31179, 31185,
## 32011, 21063, 32027, 33017, 21077, 48363, 48373, 48405, 48413, 48421,
## 48455, 13083, 21087, 13111, 8071, 5095, 17025, 17039, 21099, 17061, 17069,
## 18049, 18075, 18093, 18113, 21103, 18119, 30049, 30075, 31001, 31005,
## 31017, 31027, 31041, 31059, 31069, 31077, 48493, 21121, 47183, 48019,
## 48059, 5145, 21137, 48075, 48089, 1105, 1111, 21143, 19003, 19025, 19035,
## 17071, 17075, 17087, 17123, 17151, 17199, 31107, 28077, 28081, 21157,
## 28107, 28123, 28133, 28145, 28159, 29003, 29039, 29059, 48107, 48119,
## 48131, 48143, 48153, 48163, 21175, 48173, 48175, 48219, 48255, 48269,
## 48279, 48289, 8033, 19047, 18027, 13127, 13133, 13163, 13183, 13193, 13207,
## 13219, 13227, 13237, 13249, 29083, 29093, 19063, 37003, 37007, 37009,
## 37073, 37085, 19067, 37123, 48299, 48335, 48343, 51127, 19079, 51515,
## 51620, 51680, 51720, 51740, 13259, 13271, 13283, 20025, 20031, 20033,
## 19095, 20069, 20081, 20093, 20101, 20111, 20123, 20133, 20139, 20151,
## 19105, 20159, 37157, 37173, 39069, 39115, 39127, 39137, 39165, 40005,
## 19121, 55053, 55067, 55078, 55099, 55121, 55135, 20179, 21197, 19137,
## 21203, 21215, 22007, 22021, 22025, 22047, 22059, 22067, 22083, 48101,
## 48105, 48109, 48111, 48125, 48133, 19151, 48145, 48151, 48193, 48345,
## 48351, 19161, 51137, 51139, 51169, 51171, 51181, 55035, 55037, 55041,
## 55043, 55047, 55049, 55057, 53043, 48197, 19181, 48211, 48235, 48237,
## 48239, 48243, 19195, 48253, 48259, 51191, 51195, 51520, 51610, 20005,
## 51640, 51678, 51683, 51685, 51710, 51750, 51790, 55075, 20015, 55077,
## 55083, 55091, 55093, 55097, 53069, 27073, 54005, 54009, 54011, 54015,
## 54019, 8093, 8107, 8113, 8115, 5125, 18171, 27085, 19005, 17059, 27093,
## 20129, 20137, 20153, 20175, 22065, 5021, 21119, 21127, 21133, 21149, 21155,
## 19051, 19065, 27113, 19071, 19083, 19097, 19109, 18111, 19119, 19133,
## 19143, 19167, 27121, 19173, 19185, 19191, 20001, 20007, 22071, 27075,
## 27123, 27127, 27151, 27155, 27165, 27167, 27173, 28019, 28027, 27147,
## 19117, 37053, 37079, 20021, 28055, 28063, 27153, 22087, 38021, 38023,
## 38033, 38049, 28105, 27161, 28111, 28113, 28125, 35051, 42043, 28003,
## 45029, 6043, 28013, 47023, 38051, 40067, 40077, 40089, 40095, 40105, 28017,
## 31139, 19037, 17083, 17085, 28037, 47055, 23025, 40129, 26001, 41025,
## 26089, 26105, 28147, 28061, 47029, 31165, 31175, 26011, 28065, 30079,
## 26109, 28149, 28161, 29005, 29025, 39095, 45011, 22121, 32029, 39111,
## 39123, 30105, 38085, 38099, 29031, 41055, 23009, 41063, 29113, 39163,
## 23029, 47089, 47091, 47097, 47115, 47121, 47123, 47131, 48377, 48391,
## 48407, 36079, 48443, 48461, 48473, 48479, 48495, 48501, 48007, 29199,
## 26013, 31015, 31021, 26153, 46071, 46081, 46089, 46105, 46109, 34001,
## 24029, 31043, 31049, 31061, 29069, 29081, 29089, 37015, 37023, 37029,
## 29133, 29143, 29149, 29155, 37113, 24039, 40001, 40007, 40021, 40023,
## 40033, 37189, 48051, 24041, 47153, 47163, 47171, 47181, 36097, 36105,
## 36115, 46009, 46021, 29209, 26009, 29227, 37177, 39067, 39073, 48071,
## 26015, 45077, 26053, 46047, 26023, 46053, 27007, 27015, 27021, 27027,
## 46135, 30025, 30033, 30045, 25005, 25019, 26037, 31091, 31095, 31103,
## 28093, 27031, 27049, 27055, 37131, 37137, 37149, 42103, 34025, 26057,
## 35005, 38001, 5041, 5057, 5075, 5077, 17013, 17021, 17027, 18023, 26083,
## 5105, 5107, 18123, 18147, 18155, 18163, 26085, 18039, 13145, 13167, 13177,
## 13179, 17161, 17171, 17173, 17181, 17193, 26097, 13287, 13291, 21105,
## 48311, 48315, 48337, 54071, 51077, 51083, 51089, 51093, 51105, 51117,
## 48261, 48155, 48161, 51131, 51149, 51159, 1021, 26117, 51165, 51175, 51177,
## 550
county_mormon

#Jewish Population
jewish = votes[,c(1,88)]
colnames(jewish) = c("region","value")
jewish$value = cut(jewish$value, breaks = c(0,1,2,5,10,Inf))

c= CountyChoropleth$new(jewish)
c$title = "Jewish Population"
c$add_state_outline = TRUE
c$legend = "Jewish Percentage"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","cyan", "cyan3","blue",  "darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_jewish = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 1001, 1009, 1099, 16067, 27091, 27095, 27097, 27099, 27105,
## 27111, 27115, 27117, 27119, 16081, 27129, 27133, 19059, 27141, 27145,
## 27149, 27157, 27163, 27171, 28005, 17005, 28009, 28015, 28021, 28031,
## 28039, 19149, 28045, 28051, 28053, 17009, 28059, 28067, 28073, 23007,
## 13239, 27077, 45031, 45035, 45037, 47011, 47017, 47021, 47025, 47033,
## 47035, 47041, 47047, 47049, 47059, 17029, 47061, 23015, 23021, 23023,
## 23031, 28041, 31149, 31151, 31155, 17035, 31161, 31169, 31173, 31181,
## 32005, 32007, 32013, 32017, 32023, 17049, 39105, 39107, 39117, 39125,
## 39129, 39133, 39135, 39141, 17055, 39149, 39157, 39161, 47067, 47071,
## 47077, 47083, 47087, 17065, 47095, 47101, 47107, 47109, 47111, 47119,
## 47125, 47135, 47137, 24011, 1107, 18051, 24021, 24023, 32510, 33003, 33009,
## 18059, 33019, 39167, 39171, 39175, 40003, 40011, 18069, 40013, 40017,
## 40025, 40029, 40035, 40039, 40045, 40049, 40055, 37193, 18079, 47141,
## 48045, 48049, 48055, 48065, 48067, 48073, 48077, 48081, 18085, 48087,
## 48091, 18095, 34033, 35007, 30055, 30057, 37195, 37199, 38005, 38009,
## 38027, 38029, 38037, 38039, 38043, 38047, 38053, 18115, 38059, 38063,
## 47001, 47003, 40065, 40083, 40085, 40091, 40099, 18125, 40107, 40111,
## 40117, 40119, 40125, 26003, 26005, 18131, 26007, 30063, 30065, 30071,
## 30077, 30083, 30087, 30091, 30097, 30101, 1121, 18139, 38065, 38067, 38071,
## 38075, 38081, 38089, 38093, 38097, 38101, 39001, 18153, 39005, 39011,
## 39013, 39019, 48361, 48369, 48383, 18173, 48385, 48387, 48395, 48399,
## 48403, 48415, 48417, 48419, 48427, 18179, 48431, 26019, 26027, 26031,
## 26033, 30109, 31007, 31011, 18183, 31013, 31019, 31025, 31029, 31033,
## 31039, 31045, 31051, 31057, 31071, 19009, 31075, 39025, 39033, 39037,
## 39041, 35019, 19017, 35021, 35025, 35033, 35039, 47143, 47147, 47149,
## 47155, 47161, 47167, 19021, 47175, 47177, 45055, 45061, 45065, 45071,
## 45073, 26041, 26043, 19031, 26051, 26069, 17131, 13191, 31079, 31081,
## 31083, 31087, 31099, 19039, 31101, 31105, 31115, 31119, 28091, 28097,
## 28101, 28109, 28119, 17073, 28121, 28129, 35041, 35057, 35061, 17077,
## 36033, 40131, 40135, 40137, 40145, 17079, 40149, 41003, 41005, 41011,
## 41013, 41021, 41027, 26079, 26087, 26093, 26095, 26099, 26101, 28137,
## 28141, 28143, 28153, 17103, 28155, 28157, 28163, 29007, 29017, 29023,
## 29029, 29035, 29041, 29045, 17107, 36041, 36045, 36051, 36073, 36075,
## 48435, 17121, 48437, 48447, 48451, 48457, 48467, 48477, 48483, 17129,
## 48485, 48489, 48499, 49001, 49007, 26071, 26131, 26135, 26145, 17139,
## 26147, 26157, 29047, 29055, 29057, 29061, 29067, 29073, 17145, 29079,
## 29087, 37011, 37013, 37019, 37025, 13223, 36089, 17155, 36099, 36107,
## 36117, 42023, 17169, 45089, 46005, 46011, 46019, 46023, 46033, 46039,
## 46041, 46045, 46051, 17175, 46059, 26159, 27001, 27003, 27011, 27017,
## 27025, 27035, 17179, 27041, 27043, 27045, 27059, 37035, 37037, 37039,
## 37041, 17189, 37059, 37061, 37065, 37069, 37075, 37077, 37083, 37087,
## 17203, 42061, 42067, 18009, 41033, 41049, 41059, 41065, 41071, 18011,
## 27063, 27065, 27069, 29105, 29111, 29119, 29125, 29129, 29139, 29141,
## 18021, 29147, 29153, 29157, 29161, 29167, 37095, 37099, 37105, 37111,
## 37115, 18033, 37121, 37135, 37139, 37143, 37153, 13125, 42109, 42111,
## 42115, 42119, 49009, 49013, 13129, 49019, 49023, 48001, 48013, 48015,
## 48017, 48023, 48025, 48033, 13131, 48035, 48039, 29171, 29175, 29177,
## 29181, 29186, 29187, 29197, 29203, 13139, 29207, 29213, 29225, 30003,
## 37159, 37163, 37169, 37175, 13157, 39071, 39077, 39087, 13169, 45001,
## 45007, 45017, 45023, 45025, 46063, 46065, 46069, 13175, 46075, 46079,
## 46085, 46093, 46097, 46101, 46113, 46115, 46117, 46121, 13187, 46127,
## 30005, 30009, 30015, 30019, 30023, 30029, 30035, 30041, 31129, 13197,
## 31133, 31141, 19007, 19011, 19015, 19019, 19023, 19027, 19033, 17081,
## 13201, 17091, 17093, 17101, 17105, 17109, 17117, 17125, 17127, 13217,
## 13185, 13189, 13195, 13199, 13205, 13209, 13213, 13221, 13229, 13231,
## 13225, 13235, 13241, 13247, 13255, 13257, 13267, 13269, 20183, 20187,
## 20189, 13233, 20197, 20207, 21191, 21193, 21201, 21205, 21207, 21217,
## 21221, 21231, 13243, 21233, 21237, 21239, 22005, 22091, 22093, 22097,
## 22099, 22113, 13253, 22117, 21011, 21013, 21015, 21027, 21029, 17133,
## 17137, 17141, 13265, 17147, 17153, 17159, 17165, 17177, 17191, 13277,
## 18001, 13275, 13279, 13281, 13289, 13295, 13301, 20035, 20041, 20047,
## 13285, 20051, 20053, 20057, 20063, 20073, 20077, 20079, 20087, 22011,
## 22015, 13293, 22023, 22031, 22035, 22041, 22043, 22049, 21035, 13303,
## 21039, 21041, 21045, 21057, 21065, 21071, 21079, 21083, 21085, 20027,
## 21091, 21097, 21101, 21107, 48267, 48271, 48277, 48281, 48285, 48287,
## 20029, 48291, 48293, 48295, 48301, 48305, 48307, 48317, 48323, 48327,
## 48331, 20039, 48333, 51800, 53001, 53003, 53015, 53017, 53019, 20049,
## 53023, 53025, 51600, 55107, 55111, 55113, 55119, 55123, 55125, 20065,
## 56003, 56009, 56017, 56019, 54023, 54025, 54027, 54033, 20075, 54037,
## 54045, 54051, 54059, 54065, 54075, 48341, 54083, 20085, 54087, 54091,
## 54093, 54097, 54099, 54103, 54105, 55011, 55013, 20095, 55019, 55021,
## 55023, 53029, 49039, 49045, 49049, 20105, 50011, 50015, 56023, 51005,
## 51011, 51013, 51015, 51019, 20115, 51025, 51031, 51033, 51036, 51045,
## 51049, 51051, 51061, 51067, 20127, 51069, 51073, 51081, 51085, 51091,
## 51095, 51103, 49037, 51119, 20135, 53037, 53039, 53059, 53075, 54029,
## 54043, 54073, 54085, 20145, 54095, 51057, 51115, 22095, 22111, 21001,
## 21005, 21007, 21019, 21037, 20155, 21049, 21061, 21069, 21081, 21093,
## 21109, 21125, 21131, 40037, 40047, 20163, 37187, 38003, 38013, 38025,
## 38041, 38055, 38069, 38087, 38095, 39021, 54109, 55015, 55027, 49027,
## 49033, 49041, 49055, 50013, 20185, 51003, 51009, 51023, 51035, 51065,
## 1003, 1027, 1041, 1057, 20195, 1091, 1103, 1115, 1129, 21147, 21165, 21181,
## 19041, 19043, 20203, 19049, 19073, 19081, 19091, 19099, 19115, 19131,
## 19147, 19157, 20205, 19165, 19187, 19197, 39031, 39045, 35017, 35037,
## 35059, 21183, 36017, 36031, 36049, 51079, 12129, 13011, 13019, 13027,
## 21187, 13033, 13039, 13061, 5149, 6011, 6027, 6063, 21189, 6091, 8011,
## 8019, 8027, 20013, 20023, 27071, 27079, 21199, 27087, 27101, 27107, 27125,
## 27131, 27143, 27159, 27169, 28007, 28023, 21209, 28043, 36101, 42025,
## 42037, 42053, 21219, 42057, 42099, 42117, 42131, 1005, 1013, 1019, 21229,
## 6061, 8041, 8049, 12093, 12121, 12133, 22001, 13007, 13035, 13047, 13065,
## 13075, 28057, 28069, 22119, 22125, 22013, 23017, 24015, 26039, 45021,
## 45033, 47009, 47019, 47051, 47063, 47075, 47085, 1017, 22053, 47099, 47127,
## 6035, 13085, 13099, 13117, 8051, 8055, 8065, 8079, 8089, 8117, 12007,
## 12019, 22077, 12027, 26059, 26063, 26113, 26119, 26137, 26149, 27005,
## 22089, 27013, 27023, 27033, 27047, 27061, 47133, 47145, 47159, 47165,
## 22105, 45047, 45059, 45069, 45087, 46015, 46025, 46037, 46049, 46057,
## 22115, 46067, 46077, 51097, 51109, 1035, 1039, 1049, 1053, 1063, 21003,
## 1075, 1077, 12047, 12067, 12077, 4001, 4007, 5011, 5023, 21009, 5037, 5039,
## 5055, 5065, 5079, 29099, 29115, 29121, 29135, 29145, 21017, 29163, 29173,
## 29185, 29201, 29215, 29223, 30007, 30021, 46091, 21025, 46107, 46119,
## 46129, 40057, 40061, 40069, 40075, 40093, 40103, 40115, 5141, 21031, 40133,
## 40141, 41009, 5109, 5117, 5135, 21043, 13307, 13311, 13315, 16005, 16027,
## 16039, 16063, 16075, 21053, 17017, 30047, 31123, 31127, 31143, 31153,
## 31167, 31179, 31185, 32011, 21063, 32027, 41023, 41061, 42009, 21077,
## 48363, 48373, 48389, 48405, 48413, 48421, 48455, 48471, 13083, 21087,
## 13097, 13105, 13111, 8071, 5095, 17025, 17039, 21099, 17045, 17061, 17069,
## 18049, 18063, 18075, 18093, 18099, 18113, 21103, 18119, 18133, 18145,
## 35011, 30049, 30053, 30075, 30085, 31001, 21115, 31005, 31017, 31027,
## 31041, 31059, 31069, 31077, 31089, 48481, 48493, 21121, 48503, 49005,
## 49017, 47183, 47187, 48003, 48019, 48027, 48041, 48059, 5145, 21137, 48075,
## 48083, 48089, 1105, 1111, 1113, 1123, 21143, 18165, 19003, 19025, 19035,
## 17071, 17075, 17087, 21151, 17123, 17135, 17151, 17163, 17187, 17199,
## 31107, 28077, 28085, 21157, 28107, 28123, 28133, 28145, 28159, 29003,
## 29015, 29027, 29039, 21169, 29049, 29059, 29071, 48107, 48119, 48131,
## 48143, 48153, 48163, 48171, 21175, 48173, 48175, 48187, 48205, 48209,
## 48219, 48231, 48241, 48255, 19045, 48269, 48279, 48289, 8033, 8039, 8045,
## 6093, 6105,
county_jewish

#Total Christian Population
votes$Christian = votes$Evangelical + votes$Protestant + votes$Catholic + votes$Historically_Black + votes$Orthodox
christian = votes[,c(1,100)]
colnames(christian) = c("region","value")
christian$value = cut(christian$value, breaks = c(0,10,20,30,40,50,60,70,Inf))


c= CountyChoropleth$new(christian)
c$title = "Christian Population"
c$add_state_outline = TRUE
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("white","yellow", "salmon","springgreen","brown1", "deepskyblue", "darkmagenta", "darkblue"))
c$legend = "Christian Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_christian = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 31007, 49009, 46075, 48301, 49033, 48269, 51515, 8014,
## 32009, 31117, 49029, 16025
county_christian

Model Construction

Prepare Data for Modeling

#change decimals to match the other percentage values
votes$Trump = votes$Trump * 100
votes$Clinton = votes$Clinton * 100
votes$Obama = votes$Obama * 100
votes$Romney = votes$Romney * 100
votes$White = votes$White * 100
votes$Black = votes$Black * 100
votes$Hispanic = votes$Hispanic * 100
votes$Clinton_Obama = votes$Clinton_Obama * 100
votes$Trump_Romney = votes$Trump_Romney * 100
votes$per_shift = votes$per_shift * 100

Predict Clinton-Obama Deviation

CO_Dev_Predict = votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:77,83:95,100)]

CO_Dev_Predict = na.omit(CO_Dev_Predict)

null_CO = lm(Clinton_Obama~1,data = CO_Dev_Predict)
full_CO = lm(Clinton_Obama~.,data = CO_Dev_Predict)

CO_Dev = step(null_CO,scope=list(upper=full_CO),data=CO_Dev_Predict,direction="both")

votes$CO_Dev_Pred = predict(CO_Dev,votes)

Predict Trump-Romney Deviation

TR_Dev_Predict =  votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:76,78,83:95,100)]

TR_Dev_Predict = na.omit(TR_Dev_Predict)

null_TR = lm(Trump_Romney~1, data = TR_Dev_Predict)
full_TR = lm(Trump_Romney~., data = TR_Dev_Predict)

TR_Dev = step(null_TR,scope=list(upper=full_TR),data=TR_Dev_Predict,direction="both")

votes$TR_Dev_Pred = predict(TR_Dev, votes)

Predict Overall Deviation

Overall_Dev_Predict = votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,98,100)]

Overall_Dev_Predict = na.omit(Overall_Dev_Predict)

null_Overall = lm(per_shift~1, data = Overall_Dev_Predict)
full_Overall = lm(per_shift~., data = Overall_Dev_Predict)

Overall_Dev = step(null_Overall,scope=list(upper=full_Overall),data=Overall_Dev_Predict, direction = "both")

votes$Overall_Dev_Pred = predict(Overall_Dev, votes)

Deviation Graphics: Clinton-Obama

summary(CO_Dev)
## 
## Call:
## lm(formula = Clinton_Obama ~ `Foreign Born` + Black + Obama + 
##     Edu_batchelors + NonEnglish + Protestant + `% Female 2014` + 
##     `Median Value of Owner-Occupied Housing Units` + Income + 
##     Hispanic + White + votes_gop_2012 + `Median Household Income` + 
##     `Manufacturers Shipments - 2007` + `Merchant Wholesaler Sales - 2007` + 
##     `Persons/Household` + `Persons Under 18` + Mormon + Edu_highschool + 
##     `Hispanic-Owned Firms` + `Travel Time to Work` + population_change + 
##     Catholic + Density + total_votes_2012 + votes_dem_2012 + 
##     population2010 + Households + `Private Nonfarm Establishments 2013` + 
##     `Living in Same House 1+ Years` + `Homeownership Rate` + 
##     `Building Permits` + `Private Nonfarm Employment` + `Total Number of Firms` + 
##     `Accommodation and Food Service Sales - 2007`, data = CO_Dev_Predict)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.197 -1.395  0.033  1.332 13.576 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                    -1.205e+01  1.893e+00
## `Foreign Born`                                  3.307e-02  1.745e-02
## Black                                           1.706e-01  6.937e-03
## Obama                                          -2.241e-01  4.136e-03
## Edu_batchelors                                  3.360e-01  1.067e-02
## NonEnglish                                      5.391e-02  1.397e-02
## Protestant                                     -2.703e-02  5.351e-03
## `% Female 2014`                                 2.108e-01  2.287e-02
## `Median Value of Owner-Occupied Housing Units`  1.377e-05  1.148e-06
## Income                                         -7.876e-05  2.546e-05
## Hispanic                                        7.682e-02  9.086e-03
## White                                          -4.670e-02  6.696e-03
## votes_gop_2012                                 -3.426e-04  6.895e-05
## `Median Household Income`                      -2.228e-05  1.158e-05
## `Manufacturers Shipments - 2007`                6.281e-08  1.790e-08
## `Merchant Wholesaler Sales - 2007`             -3.791e-08  1.737e-08
## `Persons/Household`                             1.661e+00  3.438e-01
## `Persons Under 18`                             -1.304e-01  2.172e-02
## Mormon                                          4.633e-02  1.104e-02
## Edu_highschool                                 -3.837e-02  1.201e-02
## `Hispanic-Owned Firms`                          3.846e-02  9.685e-03
## `Travel Time to Work`                          -2.961e-02  1.113e-02
## population_change                               2.790e-02  1.257e-02
## Catholic                                       -1.157e-02  5.809e-03
## Density                                        -1.018e-04  3.865e-05
## total_votes_2012                                3.509e-04  6.914e-05
## votes_dem_2012                                 -3.549e-04  6.928e-05
## population2010                                 -9.648e-06  2.803e-06
## Households                                      2.445e-05  9.459e-06
## `Private Nonfarm Establishments 2013`          -2.103e-04  8.001e-05
## `Living in Same House 1+ Years`                 4.256e-02  1.321e-02
## `Homeownership Rate`                           -2.841e-02  9.159e-03
## `Building Permits`                             -1.063e-04  6.592e-05
## `Private Nonfarm Employment`                    6.774e-06  2.759e-06
## `Total Number of Firms`                         2.486e-05  1.686e-05
## `Accommodation and Food Service Sales - 2007`  -1.427e-07  9.784e-08
##                                                t value Pr(>|t|)    
## (Intercept)                                     -6.366 2.24e-10 ***
## `Foreign Born`                                   1.895 0.058199 .  
## Black                                           24.588  < 2e-16 ***
## Obama                                          -54.186  < 2e-16 ***
## Edu_batchelors                                  31.483  < 2e-16 ***
## NonEnglish                                       3.860 0.000116 ***
## Protestant                                      -5.052 4.64e-07 ***
## `% Female 2014`                                  9.217  < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units`  11.992  < 2e-16 ***
## Income                                          -3.094 0.001992 ** 
## Hispanic                                         8.455  < 2e-16 ***
## White                                           -6.975 3.74e-12 ***
## votes_gop_2012                                  -4.969 7.10e-07 ***
## `Median Household Income`                       -1.924 0.054468 .  
## `Manufacturers Shipments - 2007`                 3.509 0.000456 ***
## `Merchant Wholesaler Sales - 2007`              -2.182 0.029171 *  
## `Persons/Household`                              4.833 1.41e-06 ***
## `Persons Under 18`                              -6.005 2.13e-09 ***
## Mormon                                           4.195 2.80e-05 ***
## Edu_highschool                                  -3.195 0.001413 ** 
## `Hispanic-Owned Firms`                           3.971 7.31e-05 ***
## `Travel Time to Work`                           -2.661 0.007830 ** 
## population_change                                2.220 0.026488 *  
## Catholic                                        -1.992 0.046437 *  
## Density                                         -2.635 0.008460 ** 
## total_votes_2012                                 5.076 4.09e-07 ***
## votes_dem_2012                                  -5.122 3.21e-07 ***
## population2010                                  -3.442 0.000585 ***
## Households                                       2.584 0.009799 ** 
## `Private Nonfarm Establishments 2013`           -2.629 0.008615 ** 
## `Living in Same House 1+ Years`                  3.221 0.001289 ** 
## `Homeownership Rate`                            -3.102 0.001941 ** 
## `Building Permits`                              -1.613 0.106916    
## `Private Nonfarm Employment`                     2.456 0.014121 *  
## `Total Number of Firms`                          1.474 0.140459    
## `Accommodation and Food Service Sales - 2007`   -1.458 0.144885    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.206 on 3073 degrees of freedom
## Multiple R-squared:  0.804,  Adjusted R-squared:  0.8018 
## F-statistic: 360.3 on 35 and 3073 DF,  p-value: < 2.2e-16
votes$model_error_CO = (votes$Clinton_Obama - votes$CO_Dev_Pred)

ME_CO = votes[,c(1,104)]
colnames(ME_CO) = c("region","value")
ME_CO$value = cut(ME_CO$value, breaks = c(-10,-5,-1,1,5,10,Inf))

c= CountyChoropleth$new(ME_CO)
c$title = "Model Deviation: Clinton-Obama"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values=c("red","indianred1","white","lightcyan1","dodgerblue","darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_CO = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_CO

Deviation Graphics: Trump-Romney

summary(TR_Dev)
## 
## Call:
## lm(formula = Trump_Romney ~ Edu_batchelors + `Persons/Household` + 
##     Mormon + Romney + Black + Hispanic + Christian + White + 
##     Income + `Median Value of Owner-Occupied Housing Units` + 
##     `Travel Time to Work` + `% Female 2014` + votes_gop_2012 + 
##     `Nonemployer Establishments - 2013` + Edu_highschool + `Land Area (in sq miles)` + 
##     `Manufacturers Shipments - 2007` + `Persons Under 5` + Other_Religion + 
##     `Merchant Wholesaler Sales - 2007` + Orthodox + `Median Household Income` + 
##     Poverty + `Hispanic-Owned Firms` + `Private Nonfarm Employment` + 
##     Density + Veterans + Jewish + `Housing Units in Multi-Unit Structures` + 
##     Obama + `Accommodation and Food Service Sales - 2007` + `Black-Owned Firms` + 
##     `Homeownership Rate` + `% Change - Private Nonfarm Employment`, 
##     data = TR_Dev_Predict)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.3646  -1.4701   0.0437   1.6070  18.0019 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     4.489e+01  6.852e+00
## Edu_batchelors                                 -4.210e-01  1.315e-02
## `Persons/Household`                            -2.480e+00  4.041e-01
## Mormon                                         -2.518e-01  1.374e-02
## Romney                                         -2.929e-01  6.622e-02
## Black                                          -1.444e-01  9.071e-03
## Hispanic                                       -1.195e-01  6.662e-03
## Christian                                       2.357e-02  3.811e-03
## White                                           4.487e-02  8.625e-03
## Income                                          1.868e-04  3.063e-05
## `Median Value of Owner-Occupied Housing Units` -2.004e-05  1.322e-06
## `Travel Time to Work`                           5.907e-02  1.360e-02
## `% Female 2014`                                -1.231e-01  2.621e-02
## votes_gop_2012                                 -1.663e-05  4.492e-06
## `Nonemployer Establishments - 2013`             2.898e-05  5.693e-06
## Edu_highschool                                 -5.161e-02  1.467e-02
## `Land Area (in sq miles)`                      -1.710e-04  4.487e-05
## `Manufacturers Shipments - 2007`               -5.789e-08  1.954e-08
## `Persons Under 5`                              -2.449e-01  6.499e-02
## Other_Religion                                  4.432e-02  1.730e-02
## `Merchant Wholesaler Sales - 2007`              7.415e-08  1.927e-08
## Orthodox                                        6.386e-01  2.859e-01
## `Median Household Income`                       5.861e-05  1.502e-05
## Poverty                                         5.837e-02  1.788e-02
## `Hispanic-Owned Firms`                         -2.763e-02  1.164e-02
## `Private Nonfarm Employment`                   -8.215e-06  1.956e-06
## Density                                         1.256e-04  4.285e-05
## Veterans                                        2.484e-05  1.231e-05
## Jewish                                          6.635e-01  2.557e-01
## `Housing Units in Multi-Unit Structures`       -3.147e-02  1.198e-02
## Obama                                          -1.031e-01  6.684e-02
## `Accommodation and Food Service Sales - 2007`   2.106e-07  1.211e-07
## `Black-Owned Firms`                             1.669e-02  1.032e-02
## `Homeownership Rate`                           -1.999e-02  1.262e-02
## `% Change - Private Nonfarm Employment`        -1.211e-02  8.407e-03
##                                                t value Pr(>|t|)    
## (Intercept)                                      6.550 6.71e-11 ***
## Edu_batchelors                                 -32.004  < 2e-16 ***
## `Persons/Household`                             -6.136 9.54e-10 ***
## Mormon                                         -18.325  < 2e-16 ***
## Romney                                          -4.423 1.01e-05 ***
## Black                                          -15.920  < 2e-16 ***
## Hispanic                                       -17.940  < 2e-16 ***
## Christian                                        6.185 7.04e-10 ***
## White                                            5.203 2.09e-07 ***
## Income                                           6.099 1.20e-09 ***
## `Median Value of Owner-Occupied Housing Units` -15.156  < 2e-16 ***
## `Travel Time to Work`                            4.343 1.45e-05 ***
## `% Female 2014`                                 -4.696 2.77e-06 ***
## votes_gop_2012                                  -3.702 0.000217 ***
## `Nonemployer Establishments - 2013`              5.090 3.79e-07 ***
## Edu_highschool                                  -3.518 0.000441 ***
## `Land Area (in sq miles)`                       -3.810 0.000142 ***
## `Manufacturers Shipments - 2007`                -2.963 0.003066 ** 
## `Persons Under 5`                               -3.768 0.000168 ***
## Other_Religion                                   2.562 0.010451 *  
## `Merchant Wholesaler Sales - 2007`               3.847 0.000122 ***
## Orthodox                                         2.233 0.025594 *  
## `Median Household Income`                        3.902 9.75e-05 ***
## Poverty                                          3.265 0.001107 ** 
## `Hispanic-Owned Firms`                          -2.374 0.017678 *  
## `Private Nonfarm Employment`                    -4.199 2.76e-05 ***
## Density                                          2.930 0.003410 ** 
## Veterans                                         2.018 0.043691 *  
## Jewish                                           2.595 0.009512 ** 
## `Housing Units in Multi-Unit Structures`        -2.628 0.008643 ** 
## Obama                                           -1.543 0.122909    
## `Accommodation and Food Service Sales - 2007`    1.739 0.082180 .  
## `Black-Owned Firms`                              1.618 0.105734    
## `Homeownership Rate`                            -1.584 0.113403    
## `% Change - Private Nonfarm Employment`         -1.440 0.149921    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.713 on 3074 degrees of freedom
## Multiple R-squared:  0.7663, Adjusted R-squared:  0.7637 
## F-statistic: 296.4 on 34 and 3074 DF,  p-value: < 2.2e-16
votes$model_error_TR = (votes$Trump_Romney - votes$TR_Dev_Pred)

ME_TR = votes[,c(1,105)]
colnames(ME_TR) = c("region","value")
ME_TR$value = cut(ME_TR$value, breaks = c(-25,-10,-5,-1,1,5,10,Inf))

c= CountyChoropleth$new(ME_TR)
c$title = "Model Deviation: Trump-Romney"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_TR = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_TR

Deviation Graphics: Overall

summary(Overall_Dev)
## 
## Call:
## lm(formula = per_shift ~ Edu_batchelors + `Persons/Household` + 
##     Romney + Black + Hispanic + Mormon + Christian + White + 
##     `Median Value of Owner-Occupied Housing Units` + `Median Household Income` + 
##     votes_gop_2012 + `Nonemployer Establishments - 2013` + `% Female 2014` + 
##     `Travel Time to Work` + Income + `Manufacturers Shipments - 2007` + 
##     `Merchant Wholesaler Sales - 2007` + `Hispanic-Owned Firms` + 
##     NonEnglish + `Land Area (in sq miles)` + `Private Nonfarm Employment` + 
##     Density + Poverty + `Accommodation and Food Service Sales - 2007` + 
##     Other_Religion + Orthodox + `Homeownership Rate`, data = Overall_Dev_Predict)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.1120  -2.7512   0.0554   2.8877  21.0307 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     6.269e+01  3.142e+00
## Edu_batchelors                                 -7.741e-01  2.015e-02
## `Persons/Household`                            -3.785e+00  6.466e-01
## Romney                                         -4.155e-01  8.061e-03
## Black                                          -3.153e-01  1.411e-02
## Hispanic                                       -1.862e-01  1.892e-02
## Mormon                                         -2.889e-01  2.334e-02
## Christian                                       3.595e-02  6.439e-03
## White                                           8.760e-02  1.430e-02
## `Median Value of Owner-Occupied Housing Units` -3.547e-05  2.159e-06
## `Median Household Income`                       9.860e-05  2.498e-05
## votes_gop_2012                                 -1.997e-05  5.055e-06
## `Nonemployer Establishments - 2013`             5.304e-05  9.717e-06
## `% Female 2014`                                -3.182e-01  4.291e-02
## `Travel Time to Work`                           7.542e-02  2.219e-02
## Income                                          2.528e-04  5.160e-05
## `Manufacturers Shipments - 2007`               -1.233e-07  3.324e-08
## `Merchant Wholesaler Sales - 2007`              1.245e-07  3.016e-08
## `Hispanic-Owned Firms`                         -6.407e-02  1.994e-02
## NonEnglish                                     -7.185e-02  2.352e-02
## `Land Area (in sq miles)`                      -1.827e-04  7.431e-05
## `Private Nonfarm Employment`                   -1.250e-05  3.229e-06
## Density                                         2.107e-04  6.942e-05
## Poverty                                         6.461e-02  2.886e-02
## `Accommodation and Food Service Sales - 2007`   4.275e-07  2.040e-07
## Other_Religion                                  6.023e-02  2.956e-02
## Orthodox                                        9.071e-01  4.852e-01
## `Homeownership Rate`                            2.590e-02  1.619e-02
##                                                t value Pr(>|t|)    
## (Intercept)                                     19.955  < 2e-16 ***
## Edu_batchelors                                 -38.412  < 2e-16 ***
## `Persons/Household`                             -5.854 5.29e-09 ***
## Romney                                         -51.544  < 2e-16 ***
## Black                                          -22.345  < 2e-16 ***
## Hispanic                                        -9.842  < 2e-16 ***
## Mormon                                         -12.378  < 2e-16 ***
## Christian                                        5.583 2.57e-08 ***
## White                                            6.128 1.00e-09 ***
## `Median Value of Owner-Occupied Housing Units` -16.429  < 2e-16 ***
## `Median Household Income`                        3.947 8.09e-05 ***
## votes_gop_2012                                  -3.950 7.99e-05 ***
## `Nonemployer Establishments - 2013`              5.459 5.17e-08 ***
## `% Female 2014`                                 -7.415 1.57e-13 ***
## `Travel Time to Work`                            3.398 0.000687 ***
## Income                                           4.899 1.01e-06 ***
## `Manufacturers Shipments - 2007`                -3.710 0.000211 ***
## `Merchant Wholesaler Sales - 2007`               4.128 3.76e-05 ***
## `Hispanic-Owned Firms`                          -3.213 0.001326 ** 
## NonEnglish                                      -3.055 0.002272 ** 
## `Land Area (in sq miles)`                       -2.458 0.014022 *  
## `Private Nonfarm Employment`                    -3.871 0.000111 ***
## Density                                          3.035 0.002428 ** 
## Poverty                                          2.239 0.025224 *  
## `Accommodation and Food Service Sales - 2007`    2.096 0.036164 *  
## Other_Religion                                   2.037 0.041696 *  
## Orthodox                                         1.869 0.061659 .  
## `Homeownership Rate`                             1.600 0.109715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.663 on 3081 degrees of freedom
## Multiple R-squared:  0.7946, Adjusted R-squared:  0.7928 
## F-statistic: 441.6 on 27 and 3081 DF,  p-value: < 2.2e-16
votes$model_error_overall = (votes$per_shift - votes$Overall_Dev_Pred)

ME_Overall = votes[,c(1,106)]
colnames(ME_Overall) = c("region","value")
ME_Overall$value = cut(ME_Overall$value, breaks = c(-30,-10,-5,-1,1,5,10,Inf))

c= CountyChoropleth$new(ME_Overall)
c$title = "Model Deviation: 2016 Election Results"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_Overall = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_Overall

Predict Trump and Clinton percentage

Predict_Clinton = votes[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Trump = votes[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]

Predict_Clinton = na.omit(Predict_Clinton)
Predict_Trump = na.omit(Predict_Trump)

#Clinton
null_Clinton = lm(Clinton~1,data = Predict_Clinton)
full_Clinton = lm(Clinton~.,data = Predict_Clinton)
Clinton_Dev = step(null_Clinton,scope=list(upper=full_Clinton),data=Predict_Clinton,direction="both")
votes$Clinton_Percent_Predict = predict(Clinton_Dev,votes)

#Trump
null_Trump = lm(Trump~1,data = Predict_Trump)
full_Trump = lm(Trump~.,data = Predict_Trump)
Trump_Dev = step(null_Trump,scope=list(upper=full_Trump),data=Predict_Trump,direction="both")
votes$Trump_Percent_Predict = predict(Trump_Dev,votes)

Analyze Predict Percentage

#Clinton
summary(Clinton_Dev)
## 
## Call:
## lm(formula = Clinton ~ Obama + `Foreign Born` + Black + Edu_batchelors + 
##     NonEnglish + Protestant + `% Female 2014` + `Median Value of Owner-Occupied Housing Units` + 
##     Income + Hispanic + White + Veterans + `Nonemployer Establishments - 2013` + 
##     `Manufacturers Shipments - 2007` + Edu_highschool + `Hispanic-Owned Firms` + 
##     Mormon + `Persons Under 18` + Density + `Homeownership Rate` + 
##     `Persons/Household` + `Median Household Income` + `Living in Same House 1+ Years` + 
##     `Merchant Wholesaler Sales - 2007` + `Private Nonfarm Employment` + 
##     population_change + `Accommodation and Food Service Sales - 2007` + 
##     `Travel Time to Work` + Catholic + `Persons Under 5`, data = Predict_Clinton)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1088 -1.3843  0.0161  1.3411 13.5568 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                    -1.127e+01  1.928e+00
## Obama                                           7.770e-01  4.073e-03
## `Foreign Born`                                  3.118e-02  1.725e-02
## Black                                           1.686e-01  6.925e-03
## Edu_batchelors                                  3.445e-01  1.054e-02
## NonEnglish                                      5.385e-02  1.402e-02
## Protestant                                     -2.949e-02  5.330e-03
## `% Female 2014`                                 2.101e-01  2.300e-02
## `Median Value of Owner-Occupied Housing Units`  1.338e-05  1.108e-06
## Income                                         -7.167e-05  2.523e-05
## Hispanic                                        7.556e-02  9.076e-03
## White                                          -4.592e-02  6.750e-03
## Veterans                                        1.770e-05  6.692e-06
## `Nonemployer Establishments - 2013`            -2.429e-05  4.745e-06
## `Manufacturers Shipments - 2007`                6.367e-08  1.590e-08
## Edu_highschool                                 -4.245e-02  1.210e-02
## `Hispanic-Owned Firms`                          3.479e-02  9.615e-03
## Mormon                                          4.558e-02  1.110e-02
## `Persons Under 18`                             -8.695e-02  3.220e-02
## Density                                        -1.292e-04  3.320e-05
## `Homeownership Rate`                           -3.152e-02  9.264e-03
## `Persons/Household`                             1.545e+00  3.418e-01
## `Median Household Income`                      -2.617e-05  1.150e-05
## `Living in Same House 1+ Years`                 3.951e-02  1.336e-02
## `Merchant Wholesaler Sales - 2007`             -5.285e-08  1.539e-08
## `Private Nonfarm Employment`                    5.876e-06  1.595e-06
## population_change                               3.285e-02  1.246e-02
## `Accommodation and Food Service Sales - 2007`  -2.008e-07  9.743e-08
## `Travel Time to Work`                          -2.642e-02  1.112e-02
## Catholic                                       -1.074e-02  5.788e-03
## `Persons Under 5`                              -1.306e-01  8.055e-02
##                                                t value Pr(>|t|)    
## (Intercept)                                     -5.843 5.68e-09 ***
## Obama                                          190.755  < 2e-16 ***
## `Foreign Born`                                   1.807 0.070821 .  
## Black                                           24.352  < 2e-16 ***
## Edu_batchelors                                  32.671  < 2e-16 ***
## NonEnglish                                       3.840 0.000126 ***
## Protestant                                      -5.532 3.43e-08 ***
## `% Female 2014`                                  9.132  < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units`  12.071  < 2e-16 ***
## Income                                          -2.841 0.004523 ** 
## Hispanic                                         8.326  < 2e-16 ***
## White                                           -6.803 1.23e-11 ***
## Veterans                                         2.645 0.008213 ** 
## `Nonemployer Establishments - 2013`             -5.120 3.24e-07 ***
## `Manufacturers Shipments - 2007`                 4.005 6.36e-05 ***
## Edu_highschool                                  -3.507 0.000460 ***
## `Hispanic-Owned Firms`                           3.619 0.000301 ***
## Mormon                                           4.107 4.11e-05 ***
## `Persons Under 18`                              -2.700 0.006968 ** 
## Density                                         -3.891 0.000102 ***
## `Homeownership Rate`                            -3.402 0.000677 ***
## `Persons/Household`                              4.520 6.41e-06 ***
## `Median Household Income`                       -2.276 0.022889 *  
## `Living in Same House 1+ Years`                  2.957 0.003130 ** 
## `Merchant Wholesaler Sales - 2007`              -3.435 0.000601 ***
## `Private Nonfarm Employment`                     3.684 0.000234 ***
## population_change                                2.637 0.008410 ** 
## `Accommodation and Food Service Sales - 2007`   -2.061 0.039420 *  
## `Travel Time to Work`                           -2.377 0.017533 *  
## Catholic                                        -1.855 0.063671 .  
## `Persons Under 5`                               -1.622 0.104997    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.217 on 3078 degrees of freedom
## Multiple R-squared:  0.9793, Adjusted R-squared:  0.9791 
## F-statistic:  4866 on 30 and 3078 DF,  p-value: < 2.2e-16
Clinton_Deviation = data.frame(votes[,1])
Clinton_Deviation$deviation = votes$Clinton - votes$Clinton_Percent_Predict
colnames(Clinton_Deviation) = c("region", "value")
Clinton_Deviation$value = cut(Clinton_Deviation$value, breaks = c(-10,-5,-1,1,5,10,Inf))

c= CountyChoropleth$new(Clinton_Deviation)
c$title = "Clinton Percentage Deviation"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("red","indianred1","white","lightcyan1","dodgerblue","darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_Trump_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Clinton_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Clinton_Dev

#Trump
summary(Trump_Dev)
## 
## Call:
## lm(formula = Trump ~ Obama + Edu_batchelors + `Persons/Household` + 
##     Black + Mormon + Hispanic + Romney + Christian + White + 
##     Income + `Median Value of Owner-Occupied Housing Units` + 
##     `Travel Time to Work` + `% Female 2014` + Edu_highschool + 
##     `Nonemployer Establishments - 2013` + `Manufacturers Shipments - 2007` + 
##     `Land Area (in sq miles)` + `Persons Under 5` + Other_Religion + 
##     `Hispanic-Owned Firms` + Orthodox + `Merchant Wholesaler Sales - 2007` + 
##     `Private Nonfarm Employment` + Density + `Accommodation and Food Service Sales - 2007` + 
##     `Median Household Income` + Poverty + `Housing Units in Multi-Unit Structures` + 
##     Jewish + `Homeownership Rate` + `Black-Owned Firms` + `% Change - Private Nonfarm Employment`, 
##     data = Predict_Trump)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.899  -1.461   0.013   1.619  18.015 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     4.825e+01  6.799e+00
## Obama                                          -1.217e-01  6.669e-02
## Edu_batchelors                                 -4.294e-01  1.296e-02
## `Persons/Household`                            -2.591e+00  4.034e-01
## Black                                          -1.464e-01  9.070e-03
## Mormon                                         -2.502e-01  1.375e-02
## Hispanic                                       -1.182e-01  6.654e-03
## Romney                                          6.852e-01  6.600e-02
## Christian                                       2.424e-02  3.802e-03
## White                                           4.403e-02  8.616e-03
## Income                                          1.892e-04  3.068e-05
## `Median Value of Owner-Occupied Housing Units` -1.938e-05  1.314e-06
## `Travel Time to Work`                           5.505e-02  1.347e-02
## `% Female 2014`                                -1.309e-01  2.614e-02
## Edu_highschool                                 -5.089e-02  1.464e-02
## `Nonemployer Establishments - 2013`             2.827e-05  5.646e-06
## `Manufacturers Shipments - 2007`               -6.086e-08  1.935e-08
## `Land Area (in sq miles)`                      -1.813e-04  4.458e-05
## `Persons Under 5`                              -2.393e-01  6.508e-02
## Other_Religion                                  4.307e-02  1.732e-02
## `Hispanic-Owned Firms`                         -2.987e-02  1.161e-02
## Orthodox                                        6.061e-01  2.860e-01
## `Merchant Wholesaler Sales - 2007`              6.665e-08  1.748e-08
## `Private Nonfarm Employment`                   -9.670e-06  1.749e-06
## Density                                         1.674e-04  3.937e-05
## `Accommodation and Food Service Sales - 2007`   2.676e-07  1.191e-07
## `Median Household Income`                       5.470e-05  1.501e-05
## Poverty                                         5.698e-02  1.788e-02
## `Housing Units in Multi-Unit Structures`       -3.918e-02  1.178e-02
## Jewish                                          5.920e-01  2.555e-01
## `Homeownership Rate`                           -2.515e-02  1.256e-02
## `Black-Owned Firms`                             1.810e-02  1.031e-02
## `% Change - Private Nonfarm Employment`        -1.259e-02  8.423e-03
##                                                t value Pr(>|t|)    
## (Intercept)                                      7.096 1.59e-12 ***
## Obama                                           -1.825 0.068057 .  
## Edu_batchelors                                 -33.139  < 2e-16 ***
## `Persons/Household`                             -6.423 1.54e-10 ***
## Black                                          -16.139  < 2e-16 ***
## Mormon                                         -18.196  < 2e-16 ***
## Hispanic                                       -17.763  < 2e-16 ***
## Romney                                          10.382  < 2e-16 ***
## Christian                                        6.376 2.09e-10 ***
## White                                            5.110 3.41e-07 ***
## Income                                           6.168 7.82e-10 ***
## `Median Value of Owner-Occupied Housing Units` -14.752  < 2e-16 ***
## `Travel Time to Work`                            4.086 4.50e-05 ***
## `% Female 2014`                                 -5.008 5.80e-07 ***
## Edu_highschool                                  -3.477 0.000514 ***
## `Nonemployer Establishments - 2013`              5.008 5.81e-07 ***
## `Manufacturers Shipments - 2007`                -3.145 0.001677 ** 
## `Land Area (in sq miles)`                       -4.068 4.86e-05 ***
## `Persons Under 5`                               -3.677 0.000240 ***
## Other_Religion                                   2.487 0.012946 *  
## `Hispanic-Owned Firms`                          -2.573 0.010135 *  
## Orthodox                                         2.119 0.034150 *  
## `Merchant Wholesaler Sales - 2007`               3.813 0.000140 ***
## `Private Nonfarm Employment`                    -5.529 3.49e-08 ***
## Density                                          4.251 2.19e-05 ***
## `Accommodation and Food Service Sales - 2007`    2.247 0.024742 *  
## `Median Household Income`                        3.643 0.000274 ***
## Poverty                                          3.186 0.001455 ** 
## `Housing Units in Multi-Unit Structures`        -3.324 0.000897 ***
## Jewish                                           2.317 0.020544 *  
## `Homeownership Rate`                            -2.002 0.045351 *  
## `Black-Owned Firms`                              1.756 0.079248 .  
## `% Change - Private Nonfarm Employment`         -1.494 0.135179    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.719 on 3076 degrees of freedom
## Multiple R-squared:  0.9701, Adjusted R-squared:  0.9698 
## F-statistic:  3119 on 32 and 3076 DF,  p-value: < 2.2e-16
Trump_Deviation = data.frame(votes[,1])
Trump_Deviation$deviation = votes$Trump - votes$Trump_Percent_Predict
colnames(Trump_Deviation) = c("region", "value")
Trump_Deviation$value = cut(Trump_Deviation$value, breaks = c(-25,-10,-5,-1,1,5,10,Inf))

c= CountyChoropleth$new(Trump_Deviation)
c$title = "Trump Percentage Deviation"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_Trump_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Trump_Dev

Predict Total Votes

Predict_Votes_Clinton = votes[,c(5,14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Votes_Trump = votes[,c(6,14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]

Predict_Votes_Clinton = na.omit(Predict_Votes_Clinton)
Predict_Votes_Trump = na.omit(Predict_Votes_Trump)

#Clinton
null_Votes_Clinton = lm(votes_dem_2016~1,data = Predict_Votes_Clinton)
full_Votes_Clinton = lm(votes_dem_2016~.,data = Predict_Votes_Clinton)
Clinton_Votes_Dev = step(null_Votes_Clinton,scope=list(upper=full_Votes_Clinton),data=Predict_Votes_Clinton,direction="both")
votes$Clinton_Votes_Predict = predict(Clinton_Votes_Dev,votes)

#Trump
null_Votes_Trump = lm(votes_gop_2016~1,data = Predict_Votes_Trump)
full_Votes_Trump = lm(votes_gop_2016~.,data = Predict_Votes_Trump)
Trump_Votes_Dev = step(null_Votes_Trump,scope=list(upper=full_Votes_Trump),data=Predict_Votes_Trump,direction="both")
votes$Trump_Votes_Predict = predict(Trump_Votes_Dev,votes)

Analyze Total Votes

summary(Clinton_Votes_Dev)
## 
## Call:
## lm(formula = votes_dem_2016 ~ votes_dem_2012 + `Nonemployer Establishments - 2013` + 
##     `Private Nonfarm Employment` + votes_gop_2012 + `Foreign Born` + 
##     `Manufacturers Shipments - 2007` + total_votes_2012 + `Housing Units 2014` + 
##     population2014 + population2010 + `Total Number of Firms` + 
##     Veterans + age65plus + Households + `Median Value of Owner-Occupied Housing Units` + 
##     `Housing Units in Multi-Unit Structures` + `Merchant Wholesaler Sales - 2007` + 
##     Hindu + Buddhist + Obama + Poverty + `Median Household Income` + 
##     population_change + White + Density + `Hispanic-Owned Firms` + 
##     `Persons Under 18` + Orthodox + Hispanic + NonEnglish + `Retail Sales - 2007` + 
##     `Private Nonfarm Establishments 2013` + Protestant, data = Predict_Votes_Clinton)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -60622   -577    115    698  67806 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                    -8.581e+02  1.813e+03
## votes_dem_2012                                  1.738e+00  1.473e-01
## `Nonemployer Establishments - 2013`            -5.240e-01  7.061e-02
## `Private Nonfarm Employment`                    2.142e-02  4.849e-03
## votes_gop_2012                                  7.056e-01  1.459e-01
## `Foreign Born`                                  1.002e+02  3.153e+01
## `Manufacturers Shipments - 2007`               -8.132e-05  3.297e-05
## total_votes_2012                               -7.815e-01  1.463e-01
## `Housing Units 2014`                           -1.449e-01  1.476e-02
## population2014                                  4.417e-01  1.416e-02
## population2010                                 -4.341e-01  1.449e-02
## `Total Number of Firms`                         6.543e-01  8.445e-02
## Veterans                                       -2.424e-01  2.650e-02
## age65plus                                       9.401e+01  2.860e+01
## Households                                      1.656e-01  2.580e-02
## `Median Value of Owner-Occupied Housing Units`  9.597e-03  1.947e-03
## `Housing Units in Multi-Unit Structures`       -3.756e+01  1.394e+01
## `Merchant Wholesaler Sales - 2007`             -1.335e-04  3.143e-05
## Hindu                                           2.981e+03  7.133e+02
## Buddhist                                       -2.497e+03  6.305e+02
## Obama                                          -6.072e+01  7.643e+00
## Poverty                                         1.125e+02  2.450e+01
## `Median Household Income`                       6.184e-02  1.672e-02
## population_change                              -9.928e+01  2.276e+01
## White                                          -3.157e+01  6.946e+00
## Density                                        -3.065e-01  7.859e-02
## `Hispanic-Owned Firms`                          5.612e+01  1.759e+01
## `Persons Under 18`                             -9.458e+01  3.250e+01
## Orthodox                                       -9.151e+02  4.279e+02
## Hispanic                                       -4.545e+01  1.597e+01
## NonEnglish                                      4.866e+01  2.276e+01
## `Retail Sales - 2007`                          -3.713e-04  1.599e-04
## `Private Nonfarm Establishments 2013`           2.970e-01  1.730e-01
## Protestant                                      1.407e+01  9.408e+00
##                                                t value Pr(>|t|)    
## (Intercept)                                     -0.473 0.636044    
## votes_dem_2012                                  11.798  < 2e-16 ***
## `Nonemployer Establishments - 2013`             -7.421 1.50e-13 ***
## `Private Nonfarm Employment`                     4.417 1.04e-05 ***
## votes_gop_2012                                   4.834 1.40e-06 ***
## `Foreign Born`                                   3.178 0.001500 ** 
## `Manufacturers Shipments - 2007`                -2.466 0.013703 *  
## total_votes_2012                                -5.341 9.94e-08 ***
## `Housing Units 2014`                            -9.821  < 2e-16 ***
## population2014                                  31.186  < 2e-16 ***
## population2010                                 -29.948  < 2e-16 ***
## `Total Number of Firms`                          7.748 1.26e-14 ***
## Veterans                                        -9.149  < 2e-16 ***
## age65plus                                        3.288 0.001021 ** 
## Households                                       6.418 1.60e-10 ***
## `Median Value of Owner-Occupied Housing Units`   4.930 8.66e-07 ***
## `Housing Units in Multi-Unit Structures`        -2.695 0.007081 ** 
## `Merchant Wholesaler Sales - 2007`              -4.247 2.23e-05 ***
## Hindu                                            4.179 3.01e-05 ***
## Buddhist                                        -3.961 7.62e-05 ***
## Obama                                           -7.944 2.72e-15 ***
## Poverty                                          4.593 4.54e-06 ***
## `Median Household Income`                        3.698 0.000221 ***
## population_change                               -4.362 1.33e-05 ***
## White                                           -4.545 5.69e-06 ***
## Density                                         -3.901 9.80e-05 ***
## `Hispanic-Owned Firms`                           3.190 0.001435 ** 
## `Persons Under 18`                              -2.910 0.003634 ** 
## Orthodox                                        -2.138 0.032560 *  
## Hispanic                                        -2.846 0.004460 ** 
## NonEnglish                                       2.138 0.032636 *  
## `Retail Sales - 2007`                           -2.321 0.020326 *  
## `Private Nonfarm Establishments 2013`            1.717 0.086111 .  
## Protestant                                       1.495 0.134972    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4042 on 3075 degrees of freedom
## Multiple R-squared:  0.9969, Adjusted R-squared:  0.9968 
## F-statistic: 2.974e+04 on 33 and 3075 DF,  p-value: < 2.2e-16
summary(Trump_Votes_Dev)
## 
## Call:
## lm(formula = votes_gop_2016 ~ votes_gop_2012 + `Private Nonfarm Employment` + 
##     `Housing Units 2014` + Density + population2014 + `Private Nonfarm Establishments 2013` + 
##     population2010 + total_votes_2012 + votes_dem_2012 + `Total Number of Firms` + 
##     `Accommodation and Food Service Sales - 2007` + `Building Permits` + 
##     Orthodox + Black + Mormon + population_change + `Persons Under 5` + 
##     Edu_batchelors + Romney + `Median Household Income` + `% Female 2014` + 
##     age65plus + `Median Value of Owner-Occupied Housing Units` + 
##     `Foreign Born` + `Persons Under 18` + Veterans + Hispanic + 
##     Protestant + Catholic + `Hispanic-Owned Firms` + `Manufacturers Shipments - 2007` + 
##     Women + `Black-Owned Firms` + Income + `Retail Sales - 2007` + 
##     Jewish, data = Predict_Votes_Trump)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -54815   -862   -118    659  45629 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                    -2.573e+03  1.809e+03
## votes_gop_2012                                  3.170e+00  1.260e-01
## `Private Nonfarm Employment`                   -1.158e-01  4.580e-03
## `Housing Units 2014`                            7.389e-02  1.001e-02
## Density                                         3.893e-01  6.653e-02
## population2014                                 -2.139e-01  1.645e-02
## `Private Nonfarm Establishments 2013`           2.026e+00  1.523e-01
## population2010                                  2.159e-01  1.696e-02
## total_votes_2012                               -2.196e+00  1.258e-01
## votes_dem_2012                                  2.166e+00  1.273e-01
## `Total Number of Firms`                        -2.340e-01  3.303e-02
## `Accommodation and Food Service Sales - 2007`   1.694e-03  1.723e-04
## `Building Permits`                              1.336e+00  1.927e-01
## Orthodox                                        1.898e+03  4.000e+02
## Black                                          -2.860e+01  7.420e+00
## Mormon                                         -4.510e+01  1.856e+01
## population_change                               1.433e+02  2.183e+01
## `Persons Under 5`                              -4.508e+02  1.293e+02
## Edu_batchelors                                 -9.937e+01  1.587e+01
## Romney                                         -2.268e+01  6.733e+00
## `Median Household Income`                       1.041e-01  1.832e-02
## `% Female 2014`                                 2.406e+02  4.065e+01
## age65plus                                      -6.880e+01  2.838e+01
## `Median Value of Owner-Occupied Housing Units` -1.056e-02  1.899e-03
## `Foreign Born`                                  1.176e+02  2.523e+01
## `Persons Under 18`                             -1.391e+02  5.282e+01
## Veterans                                        9.149e-02  2.480e-02
## Hispanic                                       -3.934e+01  9.977e+00
## Protestant                                     -2.186e+01  8.846e+00
## Catholic                                        2.659e+01  9.652e+00
## `Hispanic-Owned Firms`                          3.804e+01  1.620e+01
## `Manufacturers Shipments - 2007`                6.798e-05  2.974e-05
## Women                                           1.236e+01  6.169e+00
## `Black-Owned Firms`                            -2.780e+01  1.427e+01
## Income                                         -8.685e-02  4.101e-02
## `Retail Sales - 2007`                          -2.510e-04  1.454e-04
## Jewish                                          6.189e+02  3.626e+02
##                                                t value Pr(>|t|)    
## (Intercept)                                     -1.422 0.155084    
## votes_gop_2012                                  25.160  < 2e-16 ***
## `Private Nonfarm Employment`                   -25.294  < 2e-16 ***
## `Housing Units 2014`                             7.380 2.03e-13 ***
## Density                                          5.852 5.36e-09 ***
## population2014                                 -13.004  < 2e-16 ***
## `Private Nonfarm Establishments 2013`           13.301  < 2e-16 ***
## population2010                                  12.729  < 2e-16 ***
## total_votes_2012                               -17.459  < 2e-16 ***
## votes_dem_2012                                  17.019  < 2e-16 ***
## `Total Number of Firms`                         -7.086 1.70e-12 ***
## `Accommodation and Food Service Sales - 2007`    9.831  < 2e-16 ***
## `Building Permits`                               6.933 5.02e-12 ***
## Orthodox                                         4.745 2.18e-06 ***
## Black                                           -3.854 0.000119 ***
## Mormon                                          -2.430 0.015148 *  
## population_change                                6.564 6.13e-11 ***
## `Persons Under 5`                               -3.485 0.000499 ***
## Edu_batchelors                                  -6.260 4.38e-10 ***
## Romney                                          -3.368 0.000767 ***
## `Median Household Income`                        5.681 1.46e-08 ***
## `% Female 2014`                                  5.920 3.57e-09 ***
## age65plus                                       -2.425 0.015376 *  
## `Median Value of Owner-Occupied Housing Units`  -5.560 2.93e-08 ***
## `Foreign Born`                                   4.663 3.25e-06 ***
## `Persons Under 18`                              -2.633 0.008511 ** 
## Veterans                                         3.689 0.000229 ***
## Hispanic                                        -3.943 8.22e-05 ***
## Protestant                                      -2.471 0.013531 *  
## Catholic                                         2.755 0.005904 ** 
## `Hispanic-Owned Firms`                           2.349 0.018903 *  
## `Manufacturers Shipments - 2007`                 2.286 0.022331 *  
## Women                                            2.004 0.045186 *  
## `Black-Owned Firms`                             -1.947 0.051589 .  
## Income                                          -2.118 0.034252 *  
## `Retail Sales - 2007`                           -1.726 0.084409 .  
## Jewish                                           1.707 0.087898 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3768 on 3072 degrees of freedom
## Multiple R-squared:  0.9914, Adjusted R-squared:  0.9913 
## F-statistic:  9828 on 36 and 3072 DF,  p-value: < 2.2e-16
state_predict = data.frame(states[,c(1,10,11)])
state_predict$gop_votes = 0
state_predict$dem_votes = 0
state_predict$winner = NA

for(i in seq(1:dim(votes)[1])){
  for(j in seq(1:dim(state_predict)[1])){
    if(votes[i,12] == state_predict[j,3] && !is.na(votes[i,109]) && !is.na(votes[i,110])){
      state_predict[j,4] = state_predict[j,4] + votes[i,110]
      state_predict[j,5] = state_predict[j,5] + votes[i,109]
    }
  }
}

for(i in seq(1:dim(state_predict)[1])){
  if(state_predict[i,4] > state_predict[i,5]){
    state_predict[i,6] = "TRUMP"
  }
  if(state_predict[i,4] < state_predict[i,5]){
    state_predict[i,6] = "CLINTON"
  }
  if(state_predict[i,4] == state_predict[i,5]){
    state_predict[i,6] = "TIE"
  }
}

colnames(state_predict)[2] = "region"
colnames(state_predict)[6] = "value"

state_predict$gop_margin = ((state_predict$gop_votes - state_predict$dem_votes) / (state_predict$gop_votes + state_predict$dem_votes)) * 100

c = StateChoropleth$new(state_predict)
c$title = "2016 Winner Predicted by Model"
c$add_state_outline = TRUE
c$legend = "Winner"
c$set_num_colors(3)
c$ggplot_scale = scale_fill_manual(values = c("blue","red","white"))
state_predict2 = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
state_predict2

state_margin = state_predict[,c(2,7)]
colnames(state_margin) = c("region", "value")
state_margin$value = cut(state_margin$value, breaks = c(-100,-10,-5,-1,1,5,10,100))

c = StateChoropleth$new(state_margin)
c$title = "2016 Winner Predicted by Model"
c$add_state_outline = TRUE
c$legend = "Model Predicted Win Margin"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
state_predict3 = c$render() + 
              theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
state_predict3

Correlation Analysis

library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.3
corr_subset = subset(votes, select=c(Trump,Clinton,Romney,Obama,population_change,White,Black,Hispanic,Income,Edu_highschool,Edu_batchelors))
corr_subset_religion = subset(votes, select=c(Trump,Clinton,Romney,Obama,Evangelical,Protestant,Catholic,Jewish,Mormon,Christian))

correlation = cor(corr_subset,use = "complete.obs")
correlation_religion = cor(corr_subset_religion,use = "complete.obs")


demographics = corrplot(correlation, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

religion = corrplot(correlation_religion, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

demographics
##                         Trump      Clinton       Romney       Obama
## Trump              1.00000000 -0.983706435  0.934274000 -0.93434018
## Clinton           -0.98370644  1.000000000 -0.941509781  0.94669692
## Romney             0.93427400 -0.941509781  1.000000000 -0.99813021
## Obama             -0.93434018  0.946696924 -0.998130214  1.00000000
## population_change -0.14387402  0.116002093 -0.005088279  0.00512069
## White              0.52966212 -0.593048979  0.478312388 -0.50034927
## Black             -0.42517294  0.509268989 -0.390921003  0.41515417
## Hispanic          -0.18842304  0.182939797 -0.079660815  0.08122373
## Income            -0.23666574  0.197489495 -0.129504155  0.12250369
## Edu_highschool    -0.09011927  0.007043676 -0.049315225  0.03366712
## Edu_batchelors    -0.48724875  0.434224236 -0.300027325  0.29265640
##                   population_change       White       Black    Hispanic
## Trump                  -0.143874022  0.52966212 -0.42517294 -0.18842304
## Clinton                 0.116002093 -0.59304898  0.50926899  0.18293980
## Romney                 -0.005088279  0.47831239 -0.39092100 -0.07966081
## Obama                   0.005120690 -0.50034927  0.41515417  0.08122373
## population_change       1.000000000 -0.01033989 -0.09887615  0.17060996
## White                  -0.010339887  1.00000000 -0.87293276  0.04397709
## Black                  -0.098876154 -0.87293276  1.00000000 -0.09528965
## Hispanic                0.170609962  0.04397709 -0.09528965  1.00000000
## Income                  0.383692512  0.20182088 -0.23822097 -0.03902452
## Edu_highschool          0.206248751  0.29455104 -0.35619057 -0.38413600
## Edu_batchelors          0.435806228 -0.00106647 -0.08473178  0.01336191
##                        Income Edu_highschool Edu_batchelors
## Trump             -0.23666574   -0.090119273    -0.48724875
## Clinton            0.19748949    0.007043676     0.43422424
## Romney            -0.12950415   -0.049315225    -0.30002733
## Obama              0.12250369    0.033667123     0.29265640
## population_change  0.38369251    0.206248751     0.43580623
## White              0.20182088    0.294551044    -0.00106647
## Black             -0.23822097   -0.356190573    -0.08473178
## Hispanic          -0.03902452   -0.384136004     0.01336191
## Income             1.00000000    0.642988316     0.78062981
## Edu_highschool     0.64298832    1.000000000     0.60138653
## Edu_batchelors     0.78062981    0.601386528     1.00000000
religion
##                   Trump     Clinton      Romney       Obama Evangelical
## Trump        1.00000000 -0.98369258  0.93423719 -0.93432101  0.23433745
## Clinton     -0.98369258  1.00000000 -0.94144501  0.94664018 -0.14437003
## Romney       0.93423719 -0.94144501  1.00000000 -0.99814090  0.21845482
## Obama       -0.93432101  0.94664018 -0.99814090  1.00000000 -0.19535489
## Evangelical  0.23433745 -0.14437003  0.21845482 -0.19535489  1.00000000
## Protestant   0.13208925 -0.14555522  0.04995778 -0.05100575 -0.04920175
## Catholic    -0.15322308  0.13026914 -0.17466183  0.17006743 -0.37589364
## Jewish      -0.33262451  0.33792807 -0.27718221  0.27882978 -0.17206418
## Mormon       0.01461194 -0.09757169  0.12890155 -0.13630309 -0.15133860
## Christian    0.14550766 -0.08575101  0.08054665 -0.06302473  0.62757409
##              Protestant    Catholic      Jewish      Mormon   Christian
## Trump        0.13208925 -0.15322308 -0.33262451  0.01461194  0.14550766
## Clinton     -0.14555522  0.13026914  0.33792807 -0.09757169 -0.08575101
## Romney       0.04995778 -0.17466183 -0.27718221  0.12890155  0.08054665
## Obama       -0.05100575  0.17006743  0.27882978 -0.13630309 -0.06302473
## Evangelical -0.04920175 -0.37589364 -0.17206418 -0.15133860  0.62757409
## Protestant   1.00000000  0.22831091 -0.06893755 -0.11001702  0.60529619
## Catholic     0.22831091  1.00000000  0.13279224 -0.06883001  0.29035959
## Jewish      -0.06893755  0.13279224  1.00000000 -0.02128366 -0.10046100
## Mormon      -0.11001702 -0.06883001 -0.02128366  1.00000000 -0.21864417
## Christian    0.60529619  0.29035959 -0.10046100 -0.21864417  1.00000000

Most Important Feature Analysis

Clinton

library(h2o)
## Warning: package 'h2o' was built under R version 3.3.3
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## Attaching package: 'h2o'
## The following object is masked from 'package:acs':
## 
##     apply
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## The following objects are masked from 'package:base':
## 
##     %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
h2o.init(nthreads=-1,max_mem_size='6G')
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         1 hours 10 minutes 
##     H2O cluster version:        3.10.4.6 
##     H2O cluster version age:    2 months and 7 days  
##     H2O cluster name:           H2O_started_from_R_onest_xjv369 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   4.95 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  4 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 3.3.2 (2016-10-31)
predict.Clinton = as.h2o(Predict_Clinton)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Clinton = colnames(predict.Clinton)
x_vars.Clinton = c(vars.Clinton[2:59])
y_var.Clinton = vars.Clinton[1]

Clinton_features = h2o.randomForest(x=x_vars.Clinton,
                                y=y_var.Clinton,
                                seed=123,
                                training_frame = predict.Clinton,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_13 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              21                       21              506920        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1861       1975  1914.47620
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  6.967564
## RMSE:  2.639614
## MAE:  1.987007
## RMSLE:  0.09819153
## Mean Residual Deviance :  6.967564
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  6.039764
## RMSE:  2.457593
## MAE:  1.84011
## RMSLE:  0.09185895
## Mean Residual Deviance :  6.039764
## 
## 
## Cross-Validation Metrics Summary: 
##                          mean           sd  cv_1_valid  cv_2_valid
## mae                 1.8428727   0.07523045   1.8915086   1.7472016
## mse                 6.0550523   0.55579835    6.005443   5.3264985
## r2                  0.9741264  0.002257016  0.97279286   0.9773105
## residual_deviance   6.0550523   0.55579835    6.005443   5.3264985
## rmse                2.4556847   0.11105085   2.4506006   2.3079207
## rmsle             0.091426335 0.0060151466 0.093521066 0.091158554
##                   cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae                1.8171152  1.8222564  1.7392036  1.6625148  1.9629086
## mse                 5.776114  5.5907035   5.639608   5.032604  6.8387527
## r2                 0.9789275 0.97289354 0.97356176 0.97540796   0.970742
## residual_deviance   5.776114  5.5907035   5.639608   5.032604  6.8387527
## rmse               2.4033546   2.364467   2.374786  2.2433467  2.6151009
## rmsle             0.08283377 0.07795417 0.08026096  0.0945025 0.09372223
##                   cv_8_valid cv_9_valid cv_10_valid
## mae                1.8111181   1.980861   1.9940398
## mse                 5.645194    7.41484   7.2807646
## r2                 0.9784077  0.9684981   0.9727218
## residual_deviance   5.645194    7.41484   7.2807646
## rmse               2.3759618  2.7230203   2.6982892
## rmsle             0.10725975 0.09982335  0.09322696
## 
## Scoring History: 
##             timestamp   duration number_of_trees training_rmse
## 1 2017-07-04 13:50:18 14.166 sec               0              
## 2 2017-07-04 13:50:18 14.258 sec               1       4.56398
## 3 2017-07-04 13:50:18 14.356 sec               2       4.18082
## 4 2017-07-04 13:50:19 14.449 sec               3       3.94459
## 5 2017-07-04 13:50:19 14.542 sec               4       3.79623
##   training_mae training_deviance
## 1                               
## 2      3.24738          20.82995
## 3      2.99978          17.47927
## 4      2.87190          15.55976
## 5      2.79030          14.41133
## 
## ---
##              timestamp   duration number_of_trees training_rmse
## 17 2017-07-04 13:50:20 15.567 sec              16       2.74439
## 18 2017-07-04 13:50:20 15.646 sec              17       2.71752
## 19 2017-07-04 13:50:20 15.728 sec              18       2.69444
## 20 2017-07-04 13:50:20 15.807 sec              19       2.65668
## 21 2017-07-04 13:50:20 15.885 sec              20       2.64683
## 22 2017-07-04 13:50:20 15.960 sec              21       2.63961
##    training_mae training_deviance
## 17      2.06025           7.53168
## 18      2.04409           7.38492
## 19      2.03443           7.26000
## 20      2.00742           7.05795
## 21      1.99804           7.00569
## 22      1.98701           6.96756
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                      variable relative_importance
## 1                                       Obama      4839851.500000
## 2                                      Romney      3895734.000000
## 3 Accommodation and Food Service Sales - 2007       643884.750000
## 4      Housing Units in Multi-Unit Structures       403044.406250
## 5                                       Black       374463.468750
##   scaled_importance percentage
## 1          1.000000   0.399807
## 2          0.804928   0.321816
## 3          0.133038   0.053190
## 4          0.083276   0.033294
## 5          0.077371   0.030933
## 
## ---
##                          variable relative_importance scaled_importance
## 53                         Mormon         4044.271240          0.000836
## 54 Manufacturers Shipments - 2007         2623.896729          0.000542
## 55                       Orthodox         2301.390381          0.000476
## 56                        Islamic         1621.955444          0.000335
## 57                       Buddhist          752.422119          0.000155
## 58                          Hindu          564.204590          0.000117
##    percentage
## 53   0.000334
## 54   0.000217
## 55   0.000190
## 56   0.000134
## 57   0.000062
## 58   0.000047
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama      4839851.500000
## 2                                        Romney      3895734.000000
## 3   Accommodation and Food Service Sales - 2007       643884.750000
## 4        Housing Units in Multi-Unit Structures       403044.406250
## 5                                         Black       374463.468750
## 6                                         White       316123.406250
## 7                    Private Nonfarm Employment       311307.937500
## 8                             Black-Owned Firms       212851.578125
## 9                                       Density       189867.125000
## 10                                 Foreign Born        97862.742188
## 11                               Edu_batchelors        89795.242188
## 12 Median Value of Owner-Occupied Housing Units        61838.433594
## 13                           Historically_Black        60181.757812
## 14                                   NonEnglish        46723.226562
## 15                        Total Number of Firms        43642.250000
## 16                           Homeownership Rate        33886.480469
## 17                               population2010        30759.291016
## 18                                   Households        22673.718750
## 19                            Persons/Household        22604.105469
## 20                            population_change        21853.570312
##    scaled_importance percentage
## 1           1.000000   0.399807
## 2           0.804928   0.321816
## 3           0.133038   0.053190
## 4           0.083276   0.033294
## 5           0.077371   0.030933
## 6           0.065317   0.026114
## 7           0.064322   0.025716
## 8           0.043979   0.017583
## 9           0.039230   0.015684
## 10          0.020220   0.008084
## 11          0.018553   0.007418
## 12          0.012777   0.005108
## 13          0.012435   0.004971
## 14          0.009654   0.003860
## 15          0.009017   0.003605
## 16          0.007002   0.002799
## 17          0.006355   0.002541
## 18          0.004685   0.001873
## 19          0.004670   0.001867
## 20          0.004515   0.001805
h2o.varimp_plot(Clinton_features, num_of_features = 20)

Trump

predict.Trump = as.h2o(Predict_Trump)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Trump = colnames(predict.Trump)
x_vars.Trump = c(vars.Trump[2:59])
y_var.Trump = vars.Trump[1]

Trump_features = h2o.randomForest(x=x_vars.Trump,
                                y=y_var.Trump,
                                seed=123,
                                training_frame = predict.Trump,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_14 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              18                       18              432638        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1844       1938  1906.88890
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  9.19168
## RMSE:  3.031778
## MAE:  2.273826
## RMSLE:  0.0644703
## Mean Residual Deviance :  9.19168
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  8.099768
## RMSE:  2.846009
## MAE:  2.0923
## RMSLE:  0.06425957
## Mean Residual Deviance :  8.099768
## 
## 
## Cross-Validation Metrics Summary: 
##                         mean           sd  cv_1_valid  cv_2_valid
## mae                2.0925953   0.08323233    2.084904   1.9466763
## mse                 8.099447    0.7257394    7.811906    6.649725
## r2                0.96660876 0.0033785722    0.966138   0.9721209
## residual_deviance   8.099447    0.7257394    7.811906    6.649725
## rmse               2.8401918   0.12798046   2.7949786    2.578706
## rmsle             0.06314858  0.009248693 0.055708043 0.050057102
##                    cv_3_valid cv_4_valid cv_5_valid  cv_6_valid
## mae                 1.9816947  1.9424402  1.9840581    2.267307
## mse                  7.452567  6.5643163   7.939525    9.761651
## r2                 0.97353655   0.969534 0.96519685  0.95566523
## residual_deviance    7.452567  6.5643163   7.939525    9.761651
## rmse                 2.729939   2.562092  2.8177164   3.1243641
## rmsle             0.055192083 0.05458503 0.07344787 0.060759306
##                    cv_7_valid  cv_8_valid cv_9_valid cv_10_valid
## mae                  2.107805    2.161643    2.21629   2.2331357
## mse                  7.728803    9.073089   8.840077    9.172812
## r2                  0.9676459   0.9672588  0.9623796  0.96661174
## residual_deviance    7.728803    9.073089   8.840077    9.172812
## rmse                2.7800725   3.0121567  2.9732268    3.028665
## rmsle             0.061720204 0.058766253   0.063254  0.09799592
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:50:34 12.464 sec               0              
## 2  2017-07-04 13:50:34 12.543 sec               1       4.68151
## 3  2017-07-04 13:50:35 12.626 sec               2       4.46339
## 4  2017-07-04 13:50:35 12.702 sec               3       4.18316
## 5  2017-07-04 13:50:35 12.776 sec               4       4.16084
## 6  2017-07-04 13:50:35 12.854 sec               5       3.93286
## 7  2017-07-04 13:50:35 12.933 sec               6       3.75325
## 8  2017-07-04 13:50:35 13.008 sec               7       3.64086
## 9  2017-07-04 13:50:35 13.084 sec               8       3.58072
## 10 2017-07-04 13:50:35 13.162 sec               9       3.52243
## 11 2017-07-04 13:50:35 13.236 sec              10       3.44681
## 12 2017-07-04 13:50:35 13.315 sec              11       3.37018
## 13 2017-07-04 13:50:35 13.397 sec              12       3.27858
## 14 2017-07-04 13:50:35 13.476 sec              13       3.22463
## 15 2017-07-04 13:50:35 13.554 sec              14       3.17985
## 16 2017-07-04 13:50:36 13.629 sec              15       3.13531
## 17 2017-07-04 13:50:36 13.705 sec              16       3.09054
## 18 2017-07-04 13:50:36 13.784 sec              17       3.05684
## 19 2017-07-04 13:50:36 13.861 sec              18       3.03178
##    training_mae training_deviance
## 1                                
## 2       3.50252          21.91649
## 3       3.33913          19.92182
## 4       3.13500          17.49880
## 5       3.05750          17.31255
## 6       2.91188          15.46737
## 7       2.78487          14.08689
## 8       2.70936          13.25584
## 9       2.66056          12.82155
## 10      2.61323          12.40750
## 11      2.56129          11.88051
## 12      2.51101          11.35811
## 13      2.44729          10.74907
## 14      2.40585          10.39823
## 15      2.35768          10.11145
## 16      2.34510           9.83017
## 17      2.31388           9.55142
## 18      2.29450           9.34427
## 19      2.27383           9.19168
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                      variable relative_importance
## 1                                      Romney      5155287.000000
## 2                                       Obama      2963044.500000
## 3      Housing Units in Multi-Unit Structures       528261.750000
## 4 Accommodation and Food Service Sales - 2007       367348.062500
## 5                              Edu_batchelors       318496.687500
##   scaled_importance percentage
## 1          1.000000   0.471791
## 2          0.574758   0.271166
## 3          0.102470   0.048344
## 4          0.071257   0.033618
## 5          0.061781   0.029148
## 
## ---
##                          variable relative_importance scaled_importance
## 53 Manufacturers Shipments - 2007         4126.576660          0.000800
## 54                       Orthodox         1809.172729          0.000351
## 55                         Jewish         1600.463379          0.000310
## 56                        Islamic         1317.801880          0.000256
## 57                       Buddhist          751.266907          0.000146
## 58                          Hindu           88.646675          0.000017
##    percentage
## 53   0.000378
## 54   0.000166
## 55   0.000146
## 56   0.000121
## 57   0.000069
## 58   0.000008
h2o.varimp(Trump_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                        Romney      5155287.000000
## 2                                         Obama      2963044.500000
## 3        Housing Units in Multi-Unit Structures       528261.750000
## 4   Accommodation and Food Service Sales - 2007       367348.062500
## 5                                Edu_batchelors       318496.687500
## 6                                         Black       273496.593750
## 7           Private Nonfarm Establishments 2013       187035.359375
## 8                    Private Nonfarm Employment       159617.578125
## 9  Median Value of Owner-Occupied Housing Units        93521.976562
## 10                                        White        83486.484375
## 11                                 Foreign Born        82496.109375
## 12                            Black-Owned Firms        54077.496094
## 13                                  Evangelical        53287.820312
## 14                                   NonEnglish        47489.898438
## 15                           Historically_Black        36577.964844
## 16                               population2010        29904.724609
## 17                            population_change        28675.873047
## 18                            Persons/Household        25468.931641
## 19                                     Hispanic        25454.554688
## 20                           Housing Units 2014        24591.384766
##    scaled_importance percentage
## 1           1.000000   0.471791
## 2           0.574758   0.271166
## 3           0.102470   0.048344
## 4           0.071257   0.033618
## 5           0.061781   0.029148
## 6           0.053052   0.025029
## 7           0.036280   0.017117
## 8           0.030962   0.014608
## 9           0.018141   0.008559
## 10          0.016194   0.007640
## 11          0.016002   0.007550
## 12          0.010490   0.004949
## 13          0.010337   0.004877
## 14          0.009212   0.004346
## 15          0.007095   0.003347
## 16          0.005801   0.002737
## 17          0.005562   0.002624
## 18          0.004940   0.002331
## 19          0.004938   0.002329
## 20          0.004770   0.002251
h2o.varimp_plot(Trump_features, num_of_features = 20)

Swing Predict

predict.Swing = as.h2o(Overall_Dev_Predict)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Swing = colnames(predict.Swing)
x_vars.Swing = c(vars.Swing[1:60],vars.Swing[62])
y_var.Swing = vars.Swing[61]

Swing_features = h2o.randomForest(x=x_vars.Swing,
                                y=y_var.Swing,
                                seed=123,
                                training_frame = predict.Swing,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_15 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              20                       20              482482        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1866       1988  1912.80000
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  23.38624
## RMSE:  4.835932
## MAE:  3.640499
## RMSLE:  NaN
## Mean Residual Deviance :  23.38624
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  20.82928
## RMSE:  4.563911
## MAE:  3.454607
## RMSLE:  NaN
## Mean Residual Deviance :  20.82928
## 
## 
## Cross-Validation Metrics Summary: 
##                        mean          sd cv_1_valid cv_2_valid cv_3_valid
## mae                3.456276  0.11210502  3.5003939  3.2188752  3.1686027
## mse               20.833815   1.5101509   21.30881  18.320068   17.43827
## r2                0.7998607 0.015021347  0.7985061 0.79740304  0.8198827
## residual_deviance 20.833815   1.5101509   21.30881  18.320068   17.43827
## rmse                4.55834  0.16635615  4.6161466  4.2801948  4.1759157
## rmsle                   0.0         NaN        NaN        NaN        NaN
##                   cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                3.3518422   3.576076   3.601392   3.483406  3.4794314
## mse                 18.83481  22.753336  23.270376  21.153341  22.000587
## r2                 0.8184314 0.77076167  0.7945231    0.77902  0.8145726
## residual_deviance   18.83481  22.753336  23.270376  21.153341  22.000587
## rmse                4.339909  4.7700458   4.823938   4.599276  4.6904783
## rmsle                    NaN        NaN        NaN        NaN        NaN
##                   cv_9_valid cv_10_valid
## mae                3.7058234   3.4769158
## mse                23.994698   19.263847
## r2                0.76946336  0.83604324
## residual_deviance  23.994698   19.263847
## rmse               4.8984385     4.38906
## rmsle                    NaN         NaN
## 
## Scoring History: 
##             timestamp   duration number_of_trees training_rmse
## 1 2017-07-04 13:50:52 14.316 sec               0              
## 2 2017-07-04 13:50:52 14.401 sec               1       6.80282
## 3 2017-07-04 13:50:52 14.485 sec               2       6.73403
## 4 2017-07-04 13:50:52 14.571 sec               3       6.47733
## 5 2017-07-04 13:50:52 14.654 sec               4       6.35175
##   training_mae training_deviance
## 1                               
## 2      5.16590          46.27842
## 3      5.04244          45.34712
## 4      4.85260          41.95584
## 5      4.70634          40.34476
## 
## ---
##              timestamp   duration number_of_trees training_rmse
## 16 2017-07-04 13:50:53 15.565 sec              15       4.98087
## 17 2017-07-04 13:50:53 15.648 sec              16       4.95881
## 18 2017-07-04 13:50:53 15.728 sec              17       4.93672
## 19 2017-07-04 13:50:53 15.816 sec              18       4.89476
## 20 2017-07-04 13:50:53 15.896 sec              19       4.86762
## 21 2017-07-04 13:50:54 15.978 sec              20       4.83593
##    training_mae training_deviance
## 16      3.74797          24.80909
## 17      3.73866          24.58977
## 18      3.71992          24.37123
## 19      3.69023          23.95865
## 20      3.66254          23.69370
## 21      3.64050          23.38624
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##            variable relative_importance scaled_importance percentage
## 1      Foreign Born       780725.812500          1.000000   0.168096
## 2    Edu_batchelors       767971.062500          0.983663   0.165350
## 3            Romney       293125.968750          0.375453   0.063112
## 4             Obama       287345.687500          0.368049   0.061868
## 5 population_change       264071.625000          0.338239   0.056857
## 
## ---
##          variable relative_importance scaled_importance percentage
## 56 population2010         6572.494629          0.008418   0.001415
## 57       Orthodox         5954.307129          0.007627   0.001282
## 58         Jewish         3808.336426          0.004878   0.000820
## 59       Buddhist         3445.218506          0.004413   0.000742
## 60        Islamic         2736.301758          0.003505   0.000589
## 61          Hindu         1201.396729          0.001539   0.000259
h2o.varimp(Swing_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                  Foreign Born       780725.812500
## 2                                Edu_batchelors       767971.062500
## 3                                        Romney       293125.968750
## 4                                         Obama       287345.687500
## 5                             population_change       264071.625000
## 6                                     age65plus       191607.312500
## 7                              Building Permits       176082.515625
## 8                                         Black       170653.609375
## 9                                         White       143307.312500
## 10                                   NonEnglish       135277.156250
## 11                            Persons/Household        91002.992188
## 12 Median Value of Owner-Occupied Housing Units        88411.570312
## 13                                     Hispanic        88308.492188
## 14                                       Mormon        74074.476562
## 15                                  Evangelical        68271.750000
## 16                                    Christian        66716.500000
## 17                           Historically_Black        65353.214844
## 18                                   Protestant        53810.617188
## 19                             Persons Under 18        49755.976562
## 20                                       Income        45096.437500
##    scaled_importance percentage
## 1           1.000000   0.168096
## 2           0.983663   0.165350
## 3           0.375453   0.063112
## 4           0.368049   0.061868
## 5           0.338239   0.056857
## 6           0.245422   0.041254
## 7           0.225537   0.037912
## 8           0.218583   0.036743
## 9           0.183557   0.030855
## 10          0.173271   0.029126
## 11          0.116562   0.019594
## 12          0.113243   0.019036
## 13          0.113111   0.019013
## 14          0.094879   0.015949
## 15          0.087447   0.014699
## 16          0.085454   0.014365
## 17          0.083708   0.014071
## 18          0.068924   0.011586
## 19          0.063730   0.010713
## 20          0.057762   0.009710
h2o.varimp_plot(Swing_features, num_of_features = 20)

Correlation of Most important swing variables

corr_subset_swing = votes[,c(8,9,43,46,20,19,28,32,74,35,34,44,54)]

correlation_swing = cor(corr_subset_swing,use = "complete.obs")


swing = corrplot(correlation_swing, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

swing
##                      Clinton      Trump Foreign Born Edu_batchelors
## Clinton            1.0000000 -0.9837064  0.391563486     0.43422424
## Trump             -0.9837064  1.0000000 -0.395191097    -0.48724875
## Foreign Born       0.3915635 -0.3951911  1.000000000     0.36658290
## Edu_batchelors     0.4342242 -0.4872487  0.366582898     1.00000000
## Romney            -0.9415098  0.9342740 -0.242001812    -0.30002733
## Obama              0.9466969 -0.9343402  0.244280092     0.29265640
## population_change  0.1160021 -0.1438740  0.316927722     0.43580623
## age65plus         -0.3078179  0.3230607 -0.344869443    -0.23498620
## Building Permits   0.2819284 -0.2868319  0.420037480     0.34895577
## Black              0.5092690 -0.4251729  0.009439359    -0.08473178
## White             -0.5930490  0.5296621 -0.111214215    -0.00106647
## NonEnglish         0.3203200 -0.3264593  0.822902882     0.15263309
## Persons/Household  0.1655445 -0.1705746  0.411876494    -0.06228232
##                         Romney       Obama population_change  age65plus
## Clinton           -0.941509781  0.94669692       0.116002093 -0.3078179
## Trump              0.934274000 -0.93434018      -0.143874022  0.3230607
## Foreign Born      -0.242001812  0.24428009       0.316927722 -0.3448694
## Edu_batchelors    -0.300027325  0.29265640       0.435806228 -0.2349862
## Romney             1.000000000 -0.99813021      -0.005088279  0.2017157
## Obama             -0.998130214  1.00000000       0.005120690 -0.2084752
## population_change -0.005088279  0.00512069       1.000000000 -0.4145400
## age65plus          0.201715712 -0.20847523      -0.414539997  1.0000000
## Building Permits  -0.186745154  0.18824115       0.334357078 -0.2307868
## Black             -0.390921003  0.41515417      -0.098876154 -0.2256728
## White              0.478312388 -0.50034927      -0.010339887  0.3126738
## NonEnglish        -0.205383341  0.20735084       0.238685010 -0.3026660
## Persons/Household -0.049336849  0.06191844       0.275078327 -0.6041703
##                   Building Permits        Black       White  NonEnglish
## Clinton                 0.28192841  0.509268989 -0.59304898  0.32031996
## Trump                  -0.28683186 -0.425172945  0.52966212 -0.32645928
## Foreign Born            0.42003748  0.009439359 -0.11121421  0.82290288
## Edu_batchelors          0.34895577 -0.084731778 -0.00106647  0.15263309
## Romney                 -0.18674515 -0.390921003  0.47831239 -0.20538334
## Obama                   0.18824115  0.415154166 -0.50034927  0.20735084
## population_change       0.33435708 -0.098876154 -0.01033989  0.23868501
## age65plus              -0.23078680 -0.225672806  0.31267378 -0.30266597
## Building Permits        1.00000000  0.066066494 -0.12919138  0.28003297
## Black                   0.06606649  1.000000000 -0.87293276 -0.07267724
## White                  -0.12919138 -0.872932756  1.00000000 -0.04805566
## NonEnglish              0.28003297 -0.072677242 -0.04805566  1.00000000
## Persons/Household       0.14228533  0.152645633 -0.30238451  0.50826960
##                   Persons/Household
## Clinton                  0.16554447
## Trump                   -0.17057457
## Foreign Born             0.41187649
## Edu_batchelors          -0.06228232
## Romney                  -0.04933685
## Obama                    0.06191844
## population_change        0.27507833
## age65plus               -0.60417034
## Building Permits         0.14228533
## Black                    0.15264563
## White                   -0.30238451
## NonEnglish               0.50826960
## Persons/Household        1.00000000

Prediction by Most Important Features

clinton_1 = summary(lm(Clinton ~ Obama, data=votes))$adj.r.squared
clinton_2 = summary(lm(Clinton ~ Obama + votes[,73],data=votes))$adj.r.squared
clinton_3 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51],data=votes))$adj.r.squared
clinton_4 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51] + votes[,35],data=votes))$adj.r.squared
clinton_5 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51] + votes[,35] + votes[,34],data=votes))$adj.r.squared

Clinton_rsq = c(0,clinton_1,clinton_2,clinton_3,clinton_4,clinton_5)

trump_1 = summary(lm(Trump ~ Romney, data=votes))$adj.r.squared
trump_2 = summary(lm(Trump ~ Romney + votes[,51], data=votes))$adj.r.squared
trump_3 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73], data=votes))$adj.r.squared
trump_4 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73] + votes[,46], data=votes))$adj.r.squared
trump_5 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73] + votes[,46] + votes[,35], data=votes))$adj.r.squared

Trump_rsq = c(0,trump_1,trump_2,trump_3,trump_4,trump_5)

shift_1 = summary(lm(per_shift ~ votes[,43],data=votes))$adj.r.squared
shift_2 = summary(lm(per_shift ~ votes[,43] + votes[,46],data=votes))$adj.r.squared
shift_3 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20],data=votes))$adj.r.squared
shift_4 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20]+ votes[,28],data=votes))$adj.r.squared
shift_5 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20]+ votes[,28] + votes[,32],data=votes))$adj.r.squared

theshift = c(0,shift_1,shift_2,shift_3,shift_4,shift_5)
num = c(0,1,2,3,4,5)


theMax = max(Clinton_rsq,Trump_rsq,theshift)
theMin = min(Clinton_rsq,Trump_rsq,theshift)



plot(num,
     Clinton_rsq,
     col = "blue",
     type = "b",
     main = "Accuracy for Features Selected",
     xlab = "Number of features",
     ylab = "Accuracy",
     ylim=c(0,theMax))
par(new=T)
plot(num,
     Trump_rsq,
     col = "red",
     type = "b",
     main = "Accuracy for Features Selected",
     xlab = "Number of features",
     ylab = "Accuracy",
     ylim=c(0,theMax))
par(new=T)
plot(num,
     theshift,
     type = "b",
     main = "Accuracy for Features Selected",
     xlab = "Number of features",
     ylab = "Accuracy",
     ylim=c(0,theMax))
legend("bottomright",legend=c("Clinton","Trump","Shift"),fill=c('blue','red','black'))

cat("Clinton - Obama Variation:", clinton_1,"\n")
## Clinton - Obama Variation: 0.8962017
cat("Trump - Romney Variation:", trump_1)
## Trump - Romney Variation: 0.872827

Swing Variable Understand

theSubset = votes[,c(19,20,28,31,32,34,35,40,43,44,46,52,54,55,74,84,85,86,89,98,100)]
Swing_features = lm(per_shift ~ .,data=theSubset)
summary(Swing_features)
## 
## Call:
## lm(formula = per_shift ~ ., data = theSubset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.398  -2.853  -0.068   2.912  21.873 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     3.952e+01  1.107e+01
## Obama                                           1.416e-01  1.134e-01
## Romney                                         -2.469e-01  1.130e-01
## population_change                              -2.423e-02  2.646e-02
## `Persons Under 18`                             -1.791e-01  4.392e-02
## age65plus                                      -1.088e-01  3.165e-02
## White                                           8.169e-02  1.379e-02
## Black                                          -3.004e-01  1.609e-02
## Hispanic                                       -2.064e-01  1.866e-02
## `Foreign Born`                                  4.090e-03  3.536e-02
## NonEnglish                                     -6.240e-02  2.846e-02
## Edu_batchelors                                 -8.624e-01  1.924e-02
## `Median Value of Owner-Occupied Housing Units` -2.780e-05  2.282e-06
## `Persons/Household`                            -1.980e+00  6.542e-01
## Income                                          4.023e-04  3.404e-05
## `Building Permits`                             -3.084e-04  7.782e-05
## Evangelical                                    -2.372e-03  1.394e-02
## Protestant                                      3.383e-02  1.753e-02
## Historically_Black                             -9.208e-02  6.593e-02
## Mormon                                         -2.943e-01  2.404e-02
## Christian                                       2.375e-02  1.254e-02
##                                                t value Pr(>|t|)    
## (Intercept)                                      3.571 0.000361 ***
## Obama                                            1.248 0.212091    
## Romney                                          -2.185 0.028976 *  
## population_change                               -0.916 0.359974    
## `Persons Under 18`                              -4.077 4.67e-05 ***
## age65plus                                       -3.438 0.000595 ***
## White                                            5.923 3.51e-09 ***
## Black                                          -18.663  < 2e-16 ***
## Hispanic                                       -11.058  < 2e-16 ***
## `Foreign Born`                                   0.116 0.907924    
## NonEnglish                                      -2.192 0.028446 *  
## Edu_batchelors                                 -44.827  < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units` -12.180  < 2e-16 ***
## `Persons/Household`                             -3.026 0.002500 ** 
## Income                                          11.815  < 2e-16 ***
## `Building Permits`                              -3.963 7.56e-05 ***
## Evangelical                                     -0.170 0.864903    
## Protestant                                       1.930 0.053674 .  
## Historically_Black                              -1.397 0.162639    
## Mormon                                         -12.240  < 2e-16 ***
## Christian                                        1.894 0.058354 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.821 on 3088 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:   0.78,  Adjusted R-squared:  0.7785 
## F-statistic: 547.3 on 20 and 3088 DF,  p-value: < 2.2e-16

My Model

my_subset = votes[,c(28,32,34,40,46,51,52,55,74,84,89,98,100)]
my_subset = na.omit(my_subset)
my_subset$IncomeXPopChange = my_subset$population_change * my_subset$Edu_batchelors
my_model = lm(per_shift ~ .,data=my_subset)
summary(my_model)
## 
## Call:
## lm(formula = per_shift ~ ., data = my_subset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.089  -3.983  -0.181   3.843  42.387 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     1.342e+01  1.069e+00
## population_change                              -3.108e-01  7.099e-02
## age65plus                                       1.292e-01  3.330e-02
## White                                           1.239e-01  9.031e-03
## Hispanic                                       -2.364e-01  9.046e-03
## Edu_batchelors                                 -7.792e-01  2.704e-02
## `Housing Units in Multi-Unit Structures`        1.402e-01  1.824e-02
## `Median Value of Owner-Occupied Housing Units` -9.084e-06  2.661e-06
## Income                                          1.465e-04  4.319e-05
## `Building Permits`                             -4.518e-04  1.035e-04
## Evangelical                                    -2.670e-01  1.306e-02
## Mormon                                         -5.276e-01  3.000e-02
## Christian                                       9.638e-02  9.980e-03
## IncomeXPopChange                                5.898e-03  3.088e-03
##                                                t value Pr(>|t|)    
## (Intercept)                                     12.553  < 2e-16 ***
## population_change                               -4.378 1.24e-05 ***
## age65plus                                        3.880 0.000107 ***
## White                                           13.718  < 2e-16 ***
## Hispanic                                       -26.133  < 2e-16 ***
## Edu_batchelors                                 -28.811  < 2e-16 ***
## `Housing Units in Multi-Unit Structures`         7.686 2.03e-14 ***
## `Median Value of Owner-Occupied Housing Units`  -3.414 0.000648 ***
## Income                                           3.393 0.000700 ***
## `Building Permits`                              -4.366 1.31e-05 ***
## Evangelical                                    -20.443  < 2e-16 ***
## Mormon                                         -17.584  < 2e-16 ***
## Christian                                        9.657  < 2e-16 ***
## IncomeXPopChange                                 1.910 0.056197 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.354 on 3095 degrees of freedom
## Multiple R-squared:  0.6169, Adjusted R-squared:  0.6153 
## F-statistic: 383.4 on 13 and 3095 DF,  p-value: < 2.2e-16

Separate Red, Blue, and Swing States

Swing state categorization was determined based on a competitive ground

While Utah appeared it may have been close for a while due to third party candidate Evan McMullin, Utah remains a red state because Clinton never made a play for the state and it appeared Trump would almost certainly win in the end

Red = 22, Blue = 14 + DC, Swing = 14
RedStates = votes[which(votes$state_abbr == "AL" |
                        votes$state_abbr == "Ak" |
                        votes$state_abbr == "AR" |
                        votes$state_abbr == "GA" |
                        votes$state_abbr == "ID" |
                        votes$state_abbr == "IN" |
                        votes$state_abbr == "KS" |
                        votes$state_abbr == "KY" |
                        votes$state_abbr == "LA" |
                        votes$state_abbr == "MS" |
                        votes$state_abbr == "MO" |
                        votes$state_abbr == "MT" |
                        votes$state_abbr == "NE" |
                        votes$state_abbr == "ND" |
                        votes$state_abbr == "OK" |
                        votes$state_abbr == "SC" |
                        votes$state_abbr == "SD" |
                        votes$state_abbr == "TN" |
                        votes$state_abbr == "TX" |
                        votes$state_abbr == "UT" |
                        votes$state_abbr == "WV" |
                        votes$state_abbr == "WY"),]

BlueStates = votes[which(votes$state_abbr == "CA" |
                         votes$state_abbr == "CT" |
                         votes$state_abbr == "DE" |
                         votes$state_abbr == "HI" |
                         votes$state_abbr == "IL" |
                         votes$state_abbr == "MA" |
                         votes$state_abbr == "MD" |
                         votes$state_abbr == "NJ" |
                         votes$state_abbr == "NM" |
                         votes$state_abbr == "NY" |
                         votes$state_abbr == "OR" |
                         votes$state_abbr == "RI" |
                         votes$state_abbr == "VT" |
                         votes$state_abbr == "WA" |
                         votes$state_abbr == "DC"),]

SwingStates = votes[which(votes$state_abbr == "AZ" |
                          votes$state_abbr == "CO" |
                          votes$state_abbr == "FL" |
                          votes$state_abbr == "IA" |
                          votes$state_abbr == "ME" |
                          votes$state_abbr == "MI" |
                          votes$state_abbr == "MN" |
                          votes$state_abbr == "NV" |
                          votes$state_abbr == "NH" |
                          votes$state_abbr == "NC" |
                          votes$state_abbr == "OH" |
                          votes$state_abbr == "PA" |
                          votes$state_abbr == "VA" |
                          votes$state_abbr == "WI"),]

Analze Correlation Differences between states

Red_subset1 = RedStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Red_subset2 = RedStates[,c(8,9,52,55,74,76,84,85,87,89,100)]

Blue_subset1 = BlueStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Blue_subset2 = BlueStates[,c(8,9,52,55,74,76,84,85,87,89,100)]

Swing_subset1 = SwingStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Swing_subset2 = SwingStates[,c(8,9,52,55,74,76,84,85,87,89,100)]

Red_cor1 = cor(Red_subset1,use = "complete.obs")
Red_cor2 = cor(Red_subset2,use = "complete.obs")
Blue_cor1 = cor(Blue_subset1,use = "complete.obs")
Blue_cor2 = cor(Blue_subset2,use = "complete.obs")
Swing_cor1 = cor(Swing_subset1,use = "complete.obs")
Swing_cor2 = cor(Swing_subset2,use = "complete.obs")

Red1 = corrplot(Red_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Red States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Red2 = corrplot(Red_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Red States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Blue1 = corrplot(Blue_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Blue States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Blue2 = corrplot(Blue_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Blue States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Swing1 = corrplot(Swing_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Swing States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Swing2 = corrplot(Swing_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Swing States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

Red1
##                                              Clinton       Trump
## Clinton                                 1.0000000000 -0.97454405
## Trump                                  -0.9745440481  1.00000000
## Obama                                   0.9518777225 -0.92517639
## Romney                                 -0.9438171418  0.92263151
## population_change                      -0.0008282731 -0.04660033
## age65plus                              -0.3789883517  0.41369653
## White                                  -0.7378736304  0.67726577
## Black                                   0.7051320888 -0.62306859
## Hispanic                                0.1381746618 -0.13011204
## Edu_batchelors                          0.1444483308 -0.23197460
## Housing Units in Multi-Unit Structures  0.4410465109 -0.49868421
##                                              Obama      Romney
## Clinton                                 0.95187772 -0.94381714
## Trump                                  -0.92517639  0.92263151
## Obama                                   1.00000000 -0.99763351
## Romney                                 -0.99763351  1.00000000
## population_change                      -0.07143798  0.06926463
## age65plus                              -0.28698264  0.27673943
## White                                  -0.67174688  0.64601193
## Black                                   0.61854065 -0.59384754
## Hispanic                                0.03879629 -0.02652863
## Edu_batchelors                          0.01813029 -0.02790650
## Housing Units in Multi-Unit Structures  0.37285662 -0.38091848
##                                        population_change  age65plus
## Clinton                                    -0.0008282731 -0.3789884
## Trump                                      -0.0466003272  0.4136965
## Obama                                      -0.0714379788 -0.2869826
## Romney                                      0.0692646298  0.2767394
## population_change                           1.0000000000 -0.4263996
## age65plus                                  -0.4263996223  1.0000000
## White                                       0.0824259634  0.3211914
## Black                                      -0.1704546890 -0.2340054
## Hispanic                                    0.1502199469 -0.2361226
## Edu_batchelors                              0.4217309267 -0.2105067
## Housing Units in Multi-Unit Structures      0.3144498390 -0.4693652
##                                              White      Black    Hispanic
## Clinton                                -0.73787363  0.7051321  0.13817466
## Trump                                   0.67726577 -0.6230686 -0.13011204
## Obama                                  -0.67174688  0.6185407  0.03879629
## Romney                                  0.64601193 -0.5938475 -0.02652863
## population_change                       0.08242596 -0.1704547  0.15021995
## age65plus                               0.32119141 -0.2340054 -0.23612255
## White                                   1.00000000 -0.8865513  0.13658735
## Black                                  -0.88655127  1.0000000 -0.13612422
## Hispanic                                0.13658735 -0.1361242  1.00000000
## Edu_batchelors                          0.05997594 -0.1035870 -0.02227690
## Housing Units in Multi-Unit Structures -0.17539997  0.1517083  0.04081684
##                                        Edu_batchelors
## Clinton                                    0.14444833
## Trump                                     -0.23197460
## Obama                                      0.01813029
## Romney                                    -0.02790650
## population_change                          0.42173093
## age65plus                                 -0.21050672
## White                                      0.05997594
## Black                                     -0.10358702
## Hispanic                                  -0.02227690
## Edu_batchelors                             1.00000000
## Housing Units in Multi-Unit Structures     0.56789840
##                                        Housing Units in Multi-Unit Structures
## Clinton                                                            0.44104651
## Trump                                                             -0.49868421
## Obama                                                              0.37285662
## Romney                                                            -0.38091848
## population_change                                                  0.31444984
## age65plus                                                         -0.46936520
## White                                                             -0.17539997
## Black                                                              0.15170825
## Hispanic                                                           0.04081684
## Edu_batchelors                                                     0.56789840
## Housing Units in Multi-Unit Structures                             1.00000000
Red2
##                                                    Clinton        Trump
## Clinton                                       1.0000000000 -0.974515165
## Trump                                        -0.9745151649  1.000000000
## Median Value of Owner-Occupied Housing Units  0.0610092305 -0.155044672
## Income                                       -0.2124907760  0.158887425
## Building Permits                              0.1922322822 -0.213255026
## Density                                       0.3481460922 -0.361739767
## Evangelical                                   0.1128595894  0.004316957
## Protestant                                   -0.1618435569  0.148459789
## Catholic                                     -0.0002087637 -0.018311267
## Mormon                                       -0.0930669961 -0.026693305
## Christian                                     0.0779975374  0.008299153
##                                              Median Value of Owner-Occupied Housing Units
## Clinton                                                                        0.06100923
## Trump                                                                         -0.15504467
## Median Value of Owner-Occupied Housing Units                                   1.00000000
## Income                                                                         0.54173767
## Building Permits                                                               0.23435493
## Density                                                                        0.28197037
## Evangelical                                                                   -0.33300854
## Protestant                                                                    -0.16682105
## Catholic                                                                       0.01057547
## Mormon                                                                         0.19343391
## Christian                                                                     -0.36374188
##                                                   Income Building Permits
## Clinton                                      -0.21249078      0.192232282
## Trump                                         0.15888743     -0.213255026
## Median Value of Owner-Occupied Housing Units  0.54173767      0.234354925
## Income                                        1.00000000      0.222119730
## Building Permits                              0.22211973      1.000000000
## Density                                       0.21786151      0.540524116
## Evangelical                                  -0.30707663     -0.093813491
## Protestant                                    0.34503330     -0.056483009
## Catholic                                      0.31091343      0.027574507
## Mormon                                       -0.02879495      0.001536522
## Christian                                     0.05798660     -0.087482416
##                                                  Density  Evangelical
## Clinton                                       0.34814609  0.112859589
## Trump                                        -0.36173977  0.004316957
## Median Value of Owner-Occupied Housing Units  0.28197037 -0.333008541
## Income                                        0.21786151 -0.307076633
## Building Permits                              0.54052412 -0.093813491
## Density                                       1.00000000 -0.083974026
## Evangelical                                  -0.08397403  1.000000000
## Protestant                                   -0.11856447 -0.129093351
## Catholic                                      0.03755652 -0.333344857
## Mormon                                       -0.03292587 -0.231137536
## Christian                                    -0.09244637  0.634474140
##                                               Protestant      Catholic
## Clinton                                      -0.16184356 -0.0002087637
## Trump                                         0.14845979 -0.0183112670
## Median Value of Owner-Occupied Housing Units -0.16682105  0.0105754667
## Income                                        0.34503330  0.3109134344
## Building Permits                             -0.05648301  0.0275745067
## Density                                      -0.11856447  0.0375565232
## Evangelical                                  -0.12909335 -0.3333448574
## Protestant                                    1.00000000  0.2570057154
## Catholic                                      0.25700572  1.0000000000
## Mormon                                       -0.11371929 -0.0619180298
## Christian                                     0.54605106  0.3155763655
##                                                    Mormon    Christian
## Clinton                                      -0.093066996  0.077997537
## Trump                                        -0.026693305  0.008299153
## Median Value of Owner-Occupied Housing Units  0.193433915 -0.363741880
## Income                                       -0.028794946  0.057986604
## Building Permits                              0.001536522 -0.087482416
## Density                                      -0.032925873 -0.092446368
## Evangelical                                  -0.231137536  0.634474140
## Protestant                                   -0.113719289  0.546051061
## Catholic                                     -0.061918030  0.315576366
## Mormon                                        1.000000000 -0.285805773
## Christian                                    -0.285805773  1.000000000
Blue1
##                                           Clinton      Trump      Obama
## Clinton                                 1.0000000 -0.9918399  0.9481736
## Trump                                  -0.9918399  1.0000000 -0.9421126
## Obama                                   0.9481736 -0.9421126  1.0000000
## Romney                                 -0.9468479  0.9467359 -0.9980754
## population_change                       0.5269019 -0.5156305  0.3801360
## age65plus                              -0.3944511  0.3726769 -0.3131466
## White                                  -0.6003950  0.5788362 -0.5549216
## Black                                   0.4210651 -0.3574980  0.4049580
## Hispanic                                0.2843771 -0.3213727  0.1910168
## Edu_batchelors                          0.6805001 -0.6651660  0.5388131
## Housing Units in Multi-Unit Structures  0.6713731 -0.6272824  0.6161323
##                                            Romney population_change
## Clinton                                -0.9468479         0.5269019
## Trump                                   0.9467359        -0.5156305
## Obama                                  -0.9980754         0.3801360
## Romney                                  1.0000000        -0.3737910
## population_change                      -0.3737910         1.0000000
## age65plus                               0.2991700        -0.5423292
## White                                   0.5471660        -0.4371531
## Black                                  -0.3820461         0.2140206
## Hispanic                               -0.2064690         0.2165409
## Edu_batchelors                         -0.5339278         0.4866104
## Housing Units in Multi-Unit Structures -0.5993005         0.4412370
##                                         age65plus      White        Black
## Clinton                                -0.3944511 -0.6003950  0.421065076
## Trump                                   0.3726769  0.5788362 -0.357497951
## Obama                                  -0.3131466 -0.5549216  0.404958047
## Romney                                  0.2991700  0.5471660 -0.382046062
## population_change                      -0.5423292 -0.4371531  0.214020639
## age65plus                               1.0000000  0.4273832 -0.354017194
## White                                   0.4273832  1.0000000 -0.683284217
## Black                                  -0.3540172 -0.6832842  1.000000000
## Hispanic                               -0.2954593 -0.1764290 -0.004243443
## Edu_batchelors                         -0.2737985 -0.3099415  0.207935685
## Housing Units in Multi-Unit Structures -0.4738699 -0.5381249  0.475199861
##                                            Hispanic Edu_batchelors
## Clinton                                 0.284377061     0.68050014
## Trump                                  -0.321372712    -0.66516605
## Obama                                   0.191016758     0.53881314
## Romney                                 -0.206469029    -0.53392783
## population_change                       0.216540879     0.48661041
## age65plus                              -0.295459296    -0.27379855
## White                                  -0.176428961    -0.30994146
## Black                                  -0.004243443     0.20793569
## Hispanic                                1.000000000    -0.08052484
## Edu_batchelors                         -0.080524835     1.00000000
## Housing Units in Multi-Unit Structures  0.098611945     0.56518678
##                                        Housing Units in Multi-Unit Structures
## Clinton                                                            0.67137314
## Trump                                                             -0.62728237
## Obama                                                              0.61613233
## Romney                                                            -0.59930050
## population_change                                                  0.44123700
## age65plus                                                         -0.47386994
## White                                                             -0.53812491
## Black                                                              0.47519986
## Hispanic                                                           0.09861195
## Edu_batchelors                                                     0.56518678
## Housing Units in Multi-Unit Structures                             1.00000000
Blue2
##                                                 Clinton       Trump
## Clinton                                       1.0000000 -0.99183992
## Trump                                        -0.9918399  1.00000000
## Median Value of Owner-Occupied Housing Units  0.6740587 -0.64892088
## Income                                        0.4979443 -0.45928947
## Building Permits                              0.4425732 -0.41828456
## Density                                       0.3690518 -0.33463697
## Evangelical                                  -0.5309847  0.54567940
## Protestant                                   -0.3586681  0.37904502
## Catholic                                      0.2162151 -0.20294355
## Mormon                                       -0.1143945  0.07901817
## Christian                                    -0.3495691  0.38300046
##                                              Median Value of Owner-Occupied Housing Units
## Clinton                                                                        0.67405870
## Trump                                                                         -0.64892088
## Median Value of Owner-Occupied Housing Units                                   1.00000000
## Income                                                                         0.75292542
## Building Permits                                                               0.43977266
## Density                                                                        0.40532727
## Evangelical                                                                   -0.44951518
## Protestant                                                                    -0.34503358
## Catholic                                                                       0.12911329
## Mormon                                                                        -0.04495867
## Christian                                                                     -0.36698841
##                                                   Income Building Permits
## Clinton                                       0.49794428       0.44257316
## Trump                                        -0.45928947      -0.41828456
## Median Value of Owner-Occupied Housing Units  0.75292542       0.43977266
## Income                                        1.00000000       0.32052556
## Building Permits                              0.32052556       1.00000000
## Density                                       0.28644014       0.35294770
## Evangelical                                  -0.35655004      -0.17451973
## Protestant                                   -0.01468794      -0.20886607
## Catholic                                      0.30976489       0.02966670
## Mormon                                       -0.15407197      -0.06221267
## Christian                                    -0.01613330      -0.17749160
##                                                   Density  Evangelical
## Clinton                                       0.369051753 -0.530984650
## Trump                                        -0.334636975  0.545679402
## Median Value of Owner-Occupied Housing Units  0.405327268 -0.449515182
## Income                                        0.286440140 -0.356550045
## Building Permits                              0.352947702 -0.174519734
## Density                                       1.000000000 -0.115012485
## Evangelical                                  -0.115012485  1.000000000
## Protestant                                   -0.128793888  0.245035603
## Catholic                                      0.008292259 -0.321400482
## Mormon                                       -0.073912357 -0.002749382
## Christian                                    -0.112337707  0.575112737
##                                               Protestant     Catholic
## Clinton                                      -0.35866808  0.216215148
## Trump                                         0.37904502 -0.202943554
## Median Value of Owner-Occupied Housing Units -0.34503358  0.129113291
## Income                                       -0.01468794  0.309764891
## Building Permits                             -0.20886607  0.029666701
## Density                                      -0.12879389  0.008292259
## Evangelical                                   0.24503560 -0.321400482
## Protestant                                    1.00000000 -0.068016889
## Catholic                                     -0.06801689  1.000000000
## Mormon                                       -0.10760836 -0.181057304
## Christian                                     0.59981263  0.446901351
##                                                    Mormon  Christian
## Clinton                                      -0.114394545 -0.3495691
## Trump                                         0.079018170  0.3830005
## Median Value of Owner-Occupied Housing Units -0.044958671 -0.3669884
## Income                                       -0.154071973 -0.0161333
## Building Permits                             -0.062212666 -0.1774916
## Density                                      -0.073912357 -0.1123377
## Evangelical                                  -0.002749382  0.5751127
## Protestant                                   -0.107608358  0.5998126
## Catholic                                     -0.181057304  0.4469014
## Mormon                                        1.000000000 -0.1908860
## Christian                                    -0.190885997  1.0000000
Swing1
##                                           Clinton      Trump       Obama
## Clinton                                 1.0000000 -0.9892751  0.90576185
## Trump                                  -0.9892751  1.0000000 -0.91029514
## Obama                                   0.9057618 -0.9102951  1.00000000
## Romney                                 -0.8991358  0.9092335 -0.99810214
## population_change                       0.3278722 -0.3409298  0.10787016
## age65plus                              -0.3190696  0.3443082 -0.21992734
## White                                  -0.5912501  0.5226715 -0.42897573
## Black                                   0.5060647 -0.4248166  0.35780779
## Hispanic                                0.2120297 -0.2246970  0.09593826
## Edu_batchelors                          0.5456011 -0.5878235  0.32076008
## Housing Units in Multi-Unit Structures  0.5976703 -0.6187282  0.47124876
##                                            Romney population_change
## Clinton                                -0.8991358        0.32787221
## Trump                                   0.9092335       -0.34092983
## Obama                                  -0.9981021        0.10787016
## Romney                                  1.0000000       -0.10604488
## population_change                      -0.1060449        1.00000000
## age65plus                               0.2233397       -0.37098491
## White                                   0.4100177       -0.13914053
## Black                                  -0.3354549        0.03650365
## Hispanic                               -0.1018363        0.27500009
## Edu_batchelors                         -0.3266181        0.60398889
## Housing Units in Multi-Unit Structures -0.4737638        0.44690743
##                                         age65plus       White       Black
## Clinton                                -0.3190696 -0.59125006  0.50606467
## Trump                                   0.3443082  0.52267152 -0.42481657
## Obama                                  -0.2199273 -0.42897573  0.35780779
## Romney                                  0.2233397  0.41001768 -0.33545490
## population_change                      -0.3709849 -0.13914053  0.03650365
## age65plus                               1.0000000  0.24101868 -0.17398099
## White                                   0.2410187  1.00000000 -0.90880670
## Black                                  -0.1739810 -0.90880670  1.00000000
## Hispanic                               -0.1773258 -0.11917949  0.04269540
## Edu_batchelors                         -0.3308759 -0.04486616 -0.03874371
## Housing Units in Multi-Unit Structures -0.4489890 -0.16534998  0.11930294
##                                           Hispanic Edu_batchelors
## Clinton                                 0.21202972     0.54560111
## Trump                                  -0.22469699    -0.58782349
## Obama                                   0.09593826     0.32076008
## Romney                                 -0.10183626    -0.32661812
## population_change                       0.27500009     0.60398889
## age65plus                              -0.17732582    -0.33087594
## White                                  -0.11917949    -0.04486616
## Black                                   0.04269540    -0.03874371
## Hispanic                                1.00000000     0.08983758
## Edu_batchelors                          0.08983758     1.00000000
## Housing Units in Multi-Unit Structures  0.19953222     0.61690438
##                                        Housing Units in Multi-Unit Structures
## Clinton                                                             0.5976703
## Trump                                                              -0.6187282
## Obama                                                               0.4712488
## Romney                                                             -0.4737638
## population_change                                                   0.4469074
## age65plus                                                          -0.4489890
## White                                                              -0.1653500
## Black                                                               0.1193029
## Hispanic                                                            0.1995322
## Edu_batchelors                                                      0.6169044
## Housing Units in Multi-Unit Structures                              1.0000000
Swing2
##                                                   Clinton        Trump
## Clinton                                       1.000000000 -0.989300933
## Trump                                        -0.989300933  1.000000000
## Median Value of Owner-Occupied Housing Units  0.402785367 -0.434275030
## Income                                        0.284544153 -0.331124057
## Building Permits                              0.299642444 -0.290135133
## Density                                       0.488268321 -0.478424811
## Evangelical                                  -0.026784760  0.091844644
## Protestant                                   -0.101535213  0.068936886
## Catholic                                      0.108388537 -0.147416241
## Mormon                                       -0.023343074  0.001023867
## Christian                                     0.009763309 -0.002181567
##                                              Median Value of Owner-Occupied Housing Units
## Clinton                                                                        0.40278537
## Trump                                                                         -0.43427503
## Median Value of Owner-Occupied Housing Units                                   1.00000000
## Income                                                                         0.79667459
## Building Permits                                                               0.21334092
## Density                                                                        0.36844217
## Evangelical                                                                   -0.13990840
## Protestant                                                                    -0.17653233
## Catholic                                                                       0.02430069
## Mormon                                                                         0.01655113
## Christian                                                                     -0.18403348
##                                                   Income Building Permits
## Clinton                                       0.28454415      0.299642444
## Trump                                        -0.33112406     -0.290135133
## Median Value of Owner-Occupied Housing Units  0.79667459      0.213340916
## Income                                        1.00000000      0.253087892
## Building Permits                              0.25308789      1.000000000
## Density                                       0.32256662      0.269280687
## Evangelical                                  -0.26953397     -0.135179848
## Protestant                                    0.08096759     -0.164962871
## Catholic                                      0.23383761      0.018358020
## Mormon                                       -0.05321687      0.001369272
## Christian                                    -0.01562642     -0.168045842
##                                                  Density Evangelical
## Clinton                                       0.48826832 -0.02678476
## Trump                                        -0.47842481  0.09184464
## Median Value of Owner-Occupied Housing Units  0.36844217 -0.13990840
## Income                                        0.32256662 -0.26953397
## Building Permits                              0.26928069 -0.13517985
## Density                                       1.00000000 -0.01801773
## Evangelical                                  -0.01801773  1.00000000
## Protestant                                   -0.06292108  0.03454216
## Catholic                                      0.09371452 -0.35160241
## Mormon                                       -0.04378564 -0.05221740
## Christian                                     0.02012994  0.51367896
##                                               Protestant    Catholic
## Clinton                                      -0.10153521  0.10838854
## Trump                                         0.06893689 -0.14741624
## Median Value of Owner-Occupied Housing Units -0.17653233  0.02430069
## Income                                        0.08096759  0.23383761
## Building Permits                             -0.16496287  0.01835802
## Density                                      -0.06292108  0.09371452
## Evangelical                                   0.03454216 -0.35160241
## Protestant                                    1.00000000  0.26213572
## Catholic                                      0.26213572  1.00000000
## Mormon                                       -0.12657358 -0.02562251
## Christian                                     0.74227862  0.42261901
##                                                    Mormon    Christian
## Clinton                                      -0.023343074  0.009763309
## Trump                                         0.001023867 -0.002181567
## Median Value of Owner-Occupied Housing Units  0.016551130 -0.184033481
## Income                                       -0.053216873 -0.015626422
## Building Permits                              0.001369272 -0.168045842
## Density                                      -0.043785640  0.020129944
## Evangelical                                  -0.052217401  0.513678964
## Protestant                                   -0.126573577  0.742278624
## Catholic                                     -0.025622510  0.422619012
## Mormon                                        1.000000000 -0.124979431
## Christian                                    -0.124979431  1.000000000

Most Important Features

Red_Features_clinton = RedStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Red_Features_Trump = RedStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Red_Features_Shift = RedStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]
Blue_Features_clinton = BlueStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Blue_Features_Trump = BlueStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Blue_Features_Shift = BlueStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]
Swing_Features_clinton = SwingStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Swing_Features_Trump = SwingStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Swing_Features_Shift = SwingStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]

Red States

Clinton

library(h2o)

h2o.init(nthreads=-1,max_mem_size='6G')
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         1 hours 11 minutes 
##     H2O cluster version:        3.10.4.6 
##     H2O cluster version age:    2 months and 7 days  
##     H2O cluster name:           H2O_started_from_R_onest_xjv369 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   4.93 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  4 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 3.3.2 (2016-10-31)
red.Clinton = as.h2o(Red_Features_clinton)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Clinton = colnames(red.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]

Clinton_features = h2o.randomForest(x=x_vars.Clinton,
                                y=y_var.Clinton,
                                seed=123,
                                training_frame = red.Clinton,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_16 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              16                       16              223335        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1076       1145  1105.62500
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  7.696872
## RMSE:  2.774324
## MAE:  2.055538
## RMSLE:  0.1196493
## Mean Residual Deviance :  7.696872
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  6.281383
## RMSE:  2.506269
## MAE:  1.83702
## RMSLE:  0.1095244
## Mean Residual Deviance :  6.281383
## 
## 
## Cross-Validation Metrics Summary: 
##                          mean           sd cv_1_valid cv_2_valid
## mae                 1.8355962    0.1141399  1.6837574  2.1626139
## mse                 6.2755084   0.97351587    4.92367   8.635242
## r2                  0.9686772 0.0047762212  0.9764076  0.9572694
## residual_deviance   6.2755084   0.97351587    4.92367   8.635242
## rmse                 2.490159   0.19315411  2.2189345  2.9385784
## rmsle             0.109176315 0.0054995855 0.09629298 0.12078045
##                    cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae                 1.8410962  1.6558785  1.9910244  1.7926586  1.9807729
## mse                   6.09917  4.9993973   7.562722  6.7656913   8.041615
## r2                 0.97353154 0.96766365 0.96832085 0.96336627   0.964731
## residual_deviance     6.09917  4.9993973   7.562722  6.7656913   8.041615
## rmse                2.4696498  2.2359333  2.7500403  2.6010942  2.8357742
## rmsle             0.118361875 0.10802966 0.11506413 0.10264545 0.11514403
##                   cv_8_valid  cv_9_valid cv_10_valid
## mae                1.8185035   1.8189781   1.6106786
## mse                 6.213245   5.1234756    4.390857
## r2                 0.9634585   0.9703982  0.98162526
## residual_deviance   6.213245   5.1234756    4.390857
## rmse                2.492638   2.2635095   2.0954373
## rmsle             0.09909064 0.109159976 0.107193954
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:03  4.864 sec               0              
## 2  2017-07-04 13:51:03  4.903 sec               1       4.31247
## 3  2017-07-04 13:51:03  4.942 sec               2       4.13126
## 4  2017-07-04 13:51:03  4.979 sec               3       3.87425
## 5  2017-07-04 13:51:03  5.017 sec               4       3.77081
## 6  2017-07-04 13:51:03  5.055 sec               5       3.57081
## 7  2017-07-04 13:51:03  5.093 sec               6       3.45167
## 8  2017-07-04 13:51:03  5.139 sec               7       3.27751
## 9  2017-07-04 13:51:03  5.178 sec               8       3.23221
## 10 2017-07-04 13:51:03  5.221 sec               9       3.22226
## 11 2017-07-04 13:51:03  5.259 sec              10       3.11909
## 12 2017-07-04 13:51:03  5.298 sec              11       3.08332
## 13 2017-07-04 13:51:03  5.340 sec              12       2.98879
## 14 2017-07-04 13:51:03  5.379 sec              13       2.93352
## 15 2017-07-04 13:51:03  5.420 sec              14       2.88765
## 16 2017-07-04 13:51:03  5.459 sec              15       2.82578
## 17 2017-07-04 13:51:03  5.499 sec              16       2.77432
##    training_mae training_deviance
## 1                                
## 2       3.06495          18.59741
## 3       2.93380          17.06727
## 4       2.78295          15.00984
## 5       2.75611          14.21899
## 6       2.62603          12.75065
## 7       2.54368          11.91402
## 8       2.43841          10.74207
## 9       2.37809          10.44719
## 10      2.35941          10.38293
## 11      2.29128           9.72875
## 12      2.26552           9.50687
## 13      2.20734           8.93289
## 14      2.15999           8.60552
## 15      2.11868           8.33852
## 16      2.08613           7.98506
## 17      2.05554           7.69687
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##            variable relative_importance scaled_importance percentage
## 1             Obama      2573658.750000          1.000000   0.554100
## 2            Romney       789922.812500          0.306926   0.170068
## 3             White       386090.875000          0.150016   0.083124
## 4             Black       239763.156250          0.093160   0.051620
## 5 Black-Owned Firms       199841.468750          0.077649   0.043025
## 
## ---
##                                 variable relative_importance
## 45                        population2010         2981.910645
## 46 % Change - Private Nonfarm Employment         2620.126709
## 47                                 Women         2432.234863
## 48                      Building Permits         2127.586670
## 49                                Mormon         1653.549561
## 50        Manufacturers Shipments - 2007         1473.427246
##    scaled_importance percentage
## 45          0.001159   0.000642
## 46          0.001018   0.000564
## 47          0.000945   0.000524
## 48          0.000827   0.000458
## 49          0.000642   0.000356
## 50          0.000573   0.000317
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances: 
##                                  variable relative_importance
## 1                                   Obama      2573658.750000
## 2                                  Romney       789922.812500
## 3                                   White       386090.875000
## 4                                   Black       239763.156250
## 5                       Black-Owned Firms       199841.468750
## 6                      Homeownership Rate        59751.312500
## 7                           % Female 2014        59501.601562
## 8                          Edu_batchelors        23063.269531
## 9                              Households        22870.996094
## 10                                 Income        19576.031250
## 11                           Foreign Born        18316.865234
## 12                                Density        16544.300781
## 13      Nonemployer Establishments - 2013        14730.364258
## 14                               Hispanic        14423.869141
## 15 Housing Units in Multi-Unit Structures        13085.135742
## 16                               Veterans        12188.005859
## 17                             NonEnglish        11087.466797
## 18                              age65plus        10934.945312
## 19                Median Household Income        10572.824219
## 20                     Housing Units 2014         9827.556641
##    scaled_importance percentage
## 1           1.000000   0.554100
## 2           0.306926   0.170068
## 3           0.150016   0.083124
## 4           0.093160   0.051620
## 5           0.077649   0.043025
## 6           0.023216   0.012864
## 7           0.023119   0.012810
## 8           0.008961   0.004965
## 9           0.008887   0.004924
## 10          0.007606   0.004215
## 11          0.007117   0.003944
## 12          0.006428   0.003562
## 13          0.005724   0.003171
## 14          0.005604   0.003105
## 15          0.005084   0.002817
## 16          0.004736   0.002624
## 17          0.004308   0.002387
## 18          0.004249   0.002354
## 19          0.004108   0.002276
## 20          0.003819   0.002116
h2o.varimp_plot(Clinton_features, num_of_features = 20)

Trump

red.Trump = as.h2o(Red_Features_Trump)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Trump = colnames(red.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]

Trump_features = h2o.randomForest(x=x_vars.Trump,
                                y=y_var.Trump,
                                seed=123,
                                training_frame = red.Trump,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_17 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              13                       13              178646        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1066       1111  1088.15380
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  12.46205
## RMSE:  3.530163
## MAE:  2.493103
## RMSLE:  0.06657728
## Mean Residual Deviance :  12.46205
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  9.481995
## RMSE:  3.079285
## MAE:  2.163804
## RMSLE:  0.06018314
## Mean Residual Deviance :  9.481995
## 
## 
## Cross-Validation Metrics Summary: 
##                         mean           sd cv_1_valid cv_2_valid cv_3_valid
## mae                2.1636562   0.10761768  2.1019683  2.1061199   2.432384
## mse                  9.51925    1.5137854   8.617245   7.755696  10.458083
## r2                 0.9525106 0.0053042234 0.95542735  0.9615022  0.9545914
## residual_deviance    9.51925    1.5137854   8.617245   7.755696  10.458083
## rmse               3.0678484   0.23190086  2.9355145   2.784905   3.233896
## rmsle             0.05940332 0.0074851457 0.05184572 0.05284996 0.06195238
##                    cv_4_valid cv_5_valid  cv_6_valid cv_7_valid cv_8_valid
## mae                 2.0244572  2.3964448   2.0110521  2.3069484   1.981716
## mse                  8.873459   14.25896    8.232698  12.732398  7.9788194
## r2                 0.94354117 0.94064057  0.95439506  0.9433518   0.954677
## residual_deviance    8.873459   14.25896    8.232698  12.732398  7.9788194
## rmse                 2.978835  3.7761037    2.869268  3.5682485  2.8246803
## rmsle             0.054029405 0.07046554 0.048746984 0.07363369  0.0791951
##                    cv_9_valid cv_10_valid
## mae                 2.1530216   2.1224499
## mse                  8.164184     8.12096
## r2                  0.9520078   0.9649717
## residual_deviance    8.164184     8.12096
## rmse                2.8573036   2.8497298
## rmsle             0.049664658 0.051649746
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:10  4.335 sec               0              
## 2  2017-07-04 13:51:10  4.376 sec               1       5.40441
## 3  2017-07-04 13:51:10  4.416 sec               2       4.86521
## 4  2017-07-04 13:51:10  4.456 sec               3       4.77314
## 5  2017-07-04 13:51:10  4.497 sec               4       4.43548
## 6  2017-07-04 13:51:10  4.545 sec               5       4.38092
## 7  2017-07-04 13:51:10  4.584 sec               6       4.17351
## 8  2017-07-04 13:51:10  4.627 sec               7       4.01017
## 9  2017-07-04 13:51:10  4.666 sec               8       3.90818
## 10 2017-07-04 13:51:10  4.707 sec               9       3.80337
## 11 2017-07-04 13:51:10  4.745 sec              10       3.76355
## 12 2017-07-04 13:51:10  4.785 sec              11       3.67885
## 13 2017-07-04 13:51:10  4.826 sec              12       3.60753
## 14 2017-07-04 13:51:10  4.866 sec              13       3.53016
##    training_mae training_deviance
## 1                                
## 2       3.71802          29.20760
## 3       3.43668          23.67027
## 4       3.36211          22.78284
## 5       3.14665          19.67345
## 6       3.03613          19.19247
## 7       2.90393          17.41821
## 8       2.80249          16.08148
## 9       2.74589          15.27384
## 10      2.69627          14.46563
## 11      2.66658          14.16431
## 12      2.59878          13.53392
## 13      2.54445          13.01425
## 14      2.49310          12.46205
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##            variable relative_importance scaled_importance percentage
## 1             Obama      1888954.000000          1.000000   0.508296
## 2            Romney       832012.375000          0.440462   0.223885
## 3             White       190387.718750          0.100790   0.051231
## 4 Black-Owned Firms       162311.562500          0.085927   0.043676
## 5             Black       158985.765625          0.084166   0.042781
## 
## ---
##                                 variable relative_importance
## 45                              Catholic         2947.209473
## 46          Retail Sales / Capita - 2007         2945.502686
## 47 % Change - Private Nonfarm Employment         2780.957764
## 48                                 Women         2684.436523
## 49                            Households         1600.129517
## 50        Manufacturers Shipments - 2007          794.295288
##    scaled_importance percentage
## 45          0.001560   0.000793
## 46          0.001559   0.000793
## 47          0.001472   0.000748
## 48          0.001421   0.000722
## 49          0.000847   0.000431
## 50          0.000420   0.000214
h2o.varimp(Trump_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama      1888954.000000
## 2                                        Romney       832012.375000
## 3                                         White       190387.718750
## 4                             Black-Owned Firms       162311.562500
## 5                                         Black       158985.765625
## 6        Housing Units in Multi-Unit Structures        92385.445312
## 7                                Edu_batchelors        27543.599609
## 8  Median Value of Owner-Occupied Housing Units        23571.136719
## 9                                population2010        23484.205078
## 10            Nonemployer Establishments - 2013        18110.349609
## 11                                       Mormon        16443.634766
## 12  Accommodation and Food Service Sales - 2007        15685.311523
## 13                                 Foreign Born        14747.731445
## 14                                      Density        13801.258789
## 15                                     Hispanic        13292.035156
## 16                                  Evangelical        13254.381836
## 17                                    age65plus        12997.441406
## 18                   Private Nonfarm Employment        12415.383789
## 19                                      Poverty        11684.790039
## 20                             Persons Under 18        11233.022461
##    scaled_importance percentage
## 1           1.000000   0.508296
## 2           0.440462   0.223885
## 3           0.100790   0.051231
## 4           0.085927   0.043676
## 5           0.084166   0.042781
## 6           0.048908   0.024860
## 7           0.014581   0.007412
## 8           0.012478   0.006343
## 9           0.012432   0.006319
## 10          0.009588   0.004873
## 11          0.008705   0.004425
## 12          0.008304   0.004221
## 13          0.007807   0.003968
## 14          0.007306   0.003714
## 15          0.007037   0.003577
## 16          0.007017   0.003567
## 17          0.006881   0.003497
## 18          0.006573   0.003341
## 19          0.006186   0.003144
## 20          0.005947   0.003023
h2o.varimp_plot(Trump_features, num_of_features = 20)

Swing

red.Swing = as.h2o(Red_Features_Shift)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Swing = colnames(red.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]

Swing_features = h2o.randomForest(x=x_vars.Swing,
                                y=y_var.Swing,
                                seed=123,
                                training_frame = red.Swing,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_18 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              19                       19              265331        20
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   20.00000       1070       1149  1105.68420
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  23.24074
## RMSE:  4.820866
## MAE:  3.596102
## RMSLE:  NaN
## Mean Residual Deviance :  23.24074
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  19.71987
## RMSE:  4.440706
## MAE:  3.331273
## RMSLE:  NaN
## Mean Residual Deviance :  19.71987
## 
## 
## Cross-Validation Metrics Summary: 
##                        mean          sd cv_1_valid cv_2_valid cv_3_valid
## mae               3.3331819  0.15898156  3.0794082  3.5649335  3.2170181
## mse               19.765106    2.194617  17.525085  22.534449  18.386826
## r2                0.7810878 0.018874457 0.78234655  0.7453276 0.79909736
## residual_deviance 19.765106    2.194617  17.525085  22.534449  18.386826
## rmse              4.4322605  0.24512383  4.1862974  4.7470465  4.2879863
## rmsle                   0.0         NaN        NaN        NaN        NaN
##                   cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                3.3267343  3.3794236  3.4389255  3.7522597   3.105099
## mse                 19.57111  20.494764  20.671776  26.309776  16.544827
## r2                0.77163637  0.8168954 0.76845205 0.75229067 0.83297205
## residual_deviance   19.57111  20.494764  20.671776  26.309776  16.544827
## rmse               4.4239244  4.5271144  4.5466223   5.129306   4.067533
## rmsle                    NaN        NaN        NaN        NaN        NaN
##                   cv_9_valid cv_10_valid
## mae                 3.471935   2.9960814
## mse                20.926386   14.686062
## r2                 0.7594977   0.7823626
## residual_deviance  20.926386   14.686062
## rmse                4.574537   3.8322399
## rmsle                    NaN         NaN
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:18  6.313 sec               0              
## 2  2017-07-04 13:51:18  6.351 sec               1       6.97728
## 3  2017-07-04 13:51:18  6.389 sec               2       7.09694
## 4  2017-07-04 13:51:18  6.426 sec               3       6.86900
## 5  2017-07-04 13:51:18  6.463 sec               4       6.45459
## 6  2017-07-04 13:51:18  6.503 sec               5       6.15812
## 7  2017-07-04 13:51:18  6.539 sec               6       5.86918
## 8  2017-07-04 13:51:18  6.577 sec               7       5.76196
## 9  2017-07-04 13:51:18  6.618 sec               8       5.57149
## 10 2017-07-04 13:51:19  6.656 sec               9       5.48650
## 11 2017-07-04 13:51:19  6.694 sec              10       5.33859
## 12 2017-07-04 13:51:19  6.732 sec              11       5.27095
## 13 2017-07-04 13:51:19  6.769 sec              12       5.21179
## 14 2017-07-04 13:51:19  6.808 sec              13       5.14405
## 15 2017-07-04 13:51:19  6.845 sec              14       5.07810
## 16 2017-07-04 13:51:19  6.886 sec              15       5.00606
## 17 2017-07-04 13:51:19  6.926 sec              16       4.95956
## 18 2017-07-04 13:51:19  6.968 sec              17       4.88909
## 19 2017-07-04 13:51:19  7.007 sec              18       4.86162
## 20 2017-07-04 13:51:19  7.048 sec              19       4.82087
##    training_mae training_deviance
## 1                                
## 2       5.11179          48.68248
## 3       5.10788          50.36651
## 4       5.06503          47.18318
## 5       4.80430          41.66175
## 6       4.58940          37.92244
## 7       4.39774          34.44727
## 8       4.29509          33.20024
## 9       4.16718          31.04152
## 10      4.10427          30.10166
## 11      3.98283          28.50056
## 12      3.94564          27.78290
## 13      3.88276          27.16279
## 14      3.81960          26.46128
## 15      3.75647          25.78709
## 16      3.70947          25.06059
## 17      3.67816          24.59728
## 18      3.62575          23.90319
## 19      3.61080          23.63530
## 20      3.59610          23.24074
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##         variable relative_importance scaled_importance percentage
## 1   Foreign Born       260140.906250          1.000000   0.120291
## 2          Obama       147572.171875          0.567278   0.068239
## 3 Edu_batchelors       144654.000000          0.556060   0.066889
## 4      age65plus       142850.734375          0.549128   0.066055
## 5         Romney       142136.234375          0.546382   0.065725
## 
## ---
##                                       variable relative_importance
## 45                              population2014         5878.897949
## 46                        Hispanic-Owned Firms         5656.727539
## 47 Accommodation and Food Service Sales - 2007         4519.589355
## 48            Merchant Wholesaler Sales - 2007         4270.420410
## 49                           Black-Owned Firms         3349.556885
## 50              Manufacturers Shipments - 2007         3252.891846
##    scaled_importance percentage
## 45          0.022599   0.002718
## 46          0.021745   0.002616
## 47          0.017374   0.002090
## 48          0.016416   0.001975
## 49          0.012876   0.001549
## 50          0.012504   0.001504
h2o.varimp(Swing_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                  Foreign Born       260140.906250
## 2                                         Obama       147572.171875
## 3                                Edu_batchelors       144654.000000
## 4                                     age65plus       142850.734375
## 5                                        Romney       142136.234375
## 6                                      Hispanic       124024.648438
## 7  Median Value of Owner-Occupied Housing Units       119557.585938
## 8                                         Black       112236.117188
## 9                                     Christian        85163.335938
## 10                                        White        82062.125000
## 11            Nonemployer Establishments - 2013        76253.500000
## 12                             Building Permits        63448.925781
## 13                                  Evangelical        62427.707031
## 14                        Total Number of Firms        54842.359375
## 15                                       Mormon        51562.949219
## 16                                   NonEnglish        37711.167969
## 17                               Edu_highschool        33791.375000
## 18                             Persons Under 18        29549.197266
## 19                            Persons/Household        28367.781250
## 20                              Persons Under 5        22195.193359
##    scaled_importance percentage
## 1           1.000000   0.120291
## 2           0.567278   0.068239
## 3           0.556060   0.066889
## 4           0.549128   0.066055
## 5           0.546382   0.065725
## 6           0.476760   0.057350
## 7           0.459588   0.055284
## 8           0.431444   0.051899
## 9           0.327374   0.039380
## 10          0.315453   0.037946
## 11          0.293124   0.035260
## 12          0.243902   0.029339
## 13          0.239977   0.028867
## 14          0.210818   0.025360
## 15          0.198212   0.023843
## 16          0.144964   0.017438
## 17          0.129896   0.015625
## 18          0.113589   0.013664
## 19          0.109048   0.013118
## 20          0.085320   0.010263
h2o.varimp_plot(Swing_features, num_of_features = 20)

Blue States

Clinton

blue.Clinton = as.h2o(Blue_Features_clinton)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Clinton = colnames(blue.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]

Clinton_features = h2o.randomForest(x=x_vars.Clinton,
                                y=y_var.Clinton,
                                seed=123,
                                training_frame = blue.Clinton,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_19 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              11                       11               37623        13
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        18   15.09091        258        280   267.09090
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  17.86892
## RMSE:  4.227164
## MAE:  3.093524
## RMSLE:  0.1000788
## Mean Residual Deviance :  17.86892
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  12.65615
## RMSE:  3.557548
## MAE:  2.716101
## RMSLE:  0.08515592
## Mean Residual Deviance :  12.65615
## 
## 
## Cross-Validation Metrics Summary: 
##                          mean          sd cv_1_valid cv_2_valid cv_3_valid
## mae                 2.7365935  0.27456042  2.4079978  2.6331956  2.1940808
## mse                 12.756155   3.0731835  10.893368  14.223735   7.222359
## r2                  0.9489598 0.011713537 0.94676566  0.9416267  0.9582436
## residual_deviance   12.756155   3.0731835  10.893368  14.223735   7.222359
## rmse                3.5198364  0.42831442  3.3005102  3.7714367  2.6874447
## rmsle             0.084931456 0.006937669 0.07796885 0.08770437 0.06532815
##                   cv_4_valid  cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                3.2233045   2.3888707  2.8357618  2.9102492  2.2985606
## mse                15.218432    8.519886   11.42654  14.744233   7.315518
## r2                 0.9361236  0.96321857  0.9648951  0.9506188  0.9763101
## residual_deviance  15.218432    8.519886   11.42654  14.744233   7.315518
## rmse               3.9010808   2.9188843  3.3803167   3.839822  2.7047215
## rmsle              0.0899637 0.088544466 0.09011477 0.08825237 0.07405125
##                   cv_9_valid cv_10_valid
## mae                3.1906219    3.283291
## mse                16.226276   21.771202
## r2                 0.9349227   0.9168737
## residual_deviance  16.226276   21.771202
## rmse               4.0281854    4.665962
## rmsle             0.08436655  0.10302007
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:21  1.026 sec               0              
## 2  2017-07-04 13:51:21  1.041 sec               1       7.20274
## 3  2017-07-04 13:51:21  1.053 sec               2       6.16725
## 4  2017-07-04 13:51:21  1.065 sec               3       5.54210
## 5  2017-07-04 13:51:21  1.078 sec               4       5.07667
## 6  2017-07-04 13:51:21  1.090 sec               5       4.76479
## 7  2017-07-04 13:51:21  1.101 sec               6       4.49074
## 8  2017-07-04 13:51:21  1.113 sec               7       4.30113
## 9  2017-07-04 13:51:21  1.124 sec               8       4.56498
## 10 2017-07-04 13:51:21  1.135 sec               9       4.31874
## 11 2017-07-04 13:51:21  1.148 sec              10       4.57054
## 12 2017-07-04 13:51:21  1.160 sec              11       4.22716
##    training_mae training_deviance
## 1                                
## 2       5.38390          51.87941
## 3       4.68282          38.03495
## 4       4.26325          30.71489
## 5       3.93716          25.77258
## 6       3.67798          22.70322
## 7       3.44672          20.16675
## 8       3.26608          18.49973
## 9       3.35753          20.83906
## 10      3.20815          18.65151
## 11      3.23677          20.88984
## 12      3.09352          17.86892
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                 variable relative_importance
## 1                                  Obama       521138.156250
## 2                         Edu_batchelors       143678.546875
## 3                                 Romney        97947.765625
## 4 Housing Units in Multi-Unit Structures        42949.753906
## 5                     Housing Units 2014        39517.859375
##   scaled_importance percentage
## 1          1.000000   0.534021
## 2          0.275701   0.147230
## 3          0.187950   0.100369
## 4          0.082415   0.044011
## 5          0.075830   0.040495
## 
## ---
##                                 variable relative_importance
## 45                            Households          495.466919
## 46 % Change - Private Nonfarm Employment          488.615082
## 47                      Building Permits          439.591797
## 48         Living in Same House 1+ Years          420.951080
## 49               Median Household Income          386.190063
## 50                        population2010          260.440430
##    scaled_importance percentage
## 45          0.000951   0.000508
## 46          0.000938   0.000501
## 47          0.000844   0.000450
## 48          0.000808   0.000431
## 49          0.000741   0.000396
## 50          0.000500   0.000267
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama       521138.156250
## 2                                Edu_batchelors       143678.546875
## 3                                        Romney        97947.765625
## 4        Housing Units in Multi-Unit Structures        42949.753906
## 5                            Housing Units 2014        39517.859375
## 6                                  Foreign Born        21875.576172
## 7                                    NonEnglish        16887.023438
## 8  Median Value of Owner-Occupied Housing Units        16468.039062
## 9                                       Density         9315.324219
## 10                            Black-Owned Firms         5275.490723
## 11            Nonemployer Establishments - 2013         4924.349609
## 12  Accommodation and Food Service Sales - 2007         4899.251465
## 13                                      Poverty         4471.787598
## 14                           Homeownership Rate         3373.303711
## 15                            Persons/Household         2891.104492
## 16                                  Evangelical         2808.970703
## 17                                   Protestant         2415.911621
## 18                                        White         2267.387939
## 19               Manufacturers Shipments - 2007         2228.818604
## 20                          Retail Sales - 2007         2164.006836
##    scaled_importance percentage
## 1           1.000000   0.534021
## 2           0.275701   0.147230
## 3           0.187950   0.100369
## 4           0.082415   0.044011
## 5           0.075830   0.040495
## 6           0.041977   0.022416
## 7           0.032404   0.017304
## 8           0.031600   0.016875
## 9           0.017875   0.009546
## 10          0.010123   0.005406
## 11          0.009449   0.005046
## 12          0.009401   0.005020
## 13          0.008581   0.004582
## 14          0.006473   0.003457
## 15          0.005548   0.002963
## 16          0.005390   0.002878
## 17          0.004636   0.002476
## 18          0.004351   0.002323
## 19          0.004277   0.002284
## 20          0.004152   0.002218
h2o.varimp_plot(Clinton_features, num_of_features = 20)

Trump

blue.Trump = as.h2o(Blue_Features_Trump)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Trump = colnames(blue.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]

Trump_features = h2o.randomForest(x=x_vars.Trump,
                                y=y_var.Trump,
                                seed=123,
                                training_frame = blue.Trump,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_20 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              15                       15               51240        13
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        16   14.93333        256        284   266.93332
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  16.72938
## RMSE:  4.090157
## MAE:  3.076985
## RMSLE:  0.1290486
## Mean Residual Deviance :  16.72938
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  14.90733
## RMSE:  3.861001
## MAE:  3.064429
## RMSLE:  0.1172027
## Mean Residual Deviance :  14.90733
## 
## 
## Cross-Validation Metrics Summary: 
##                         mean          sd  cv_1_valid cv_2_valid cv_3_valid
## mae                3.0837953  0.28637564   2.7118068   2.824019  3.3638227
## mse                 14.96513   2.1702669   11.829679   17.16653  16.693277
## r2                0.94103885 0.010828344   0.9446266 0.92939544 0.91053694
## residual_deviance   14.96513   2.1702669   11.829679   17.16653  16.693277
## rmse               3.8452232  0.29949012     3.43943  4.1432514  4.0857406
## rmsle             0.11109822  0.02232643 0.100389555 0.18709083 0.09374424
##                   cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                  3.49672   2.319074  3.5025194  2.6726663  3.0889263
## mse                 18.01212   8.360348   16.46241  11.838682  16.484095
## r2                0.92913544 0.96507454 0.95141697  0.9595432 0.94678384
## residual_deviance   18.01212   8.360348   16.46241  11.838682  16.484095
## rmse               4.2440686  2.8914266  4.0573897  3.4407387   4.060061
## rmsle             0.11449931 0.06198062 0.10806376 0.08483107 0.12588707
##                   cv_9_valid cv_10_valid
## mae                3.3352625   3.5231369
## mse                14.793237   18.010918
## r2                0.94081587   0.9330592
## residual_deviance  14.793237   18.010918
## rmse               3.8461976    4.243927
## rmsle             0.10381192   0.1306839
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:24  1.231 sec               0              
## 2  2017-07-04 13:51:24  1.243 sec               1       7.70278
## 3  2017-07-04 13:51:24  1.253 sec               2       6.50961
## 4  2017-07-04 13:51:24  1.265 sec               3       5.92579
## 5  2017-07-04 13:51:24  1.275 sec               4       5.78929
## 6  2017-07-04 13:51:24  1.286 sec               5       5.26220
## 7  2017-07-04 13:51:24  1.296 sec               6       5.15896
## 8  2017-07-04 13:51:24  1.308 sec               7       5.00656
## 9  2017-07-04 13:51:24  1.319 sec               8       5.11579
## 10 2017-07-04 13:51:24  1.331 sec               9       4.77839
## 11 2017-07-04 13:51:24  1.342 sec              10       4.56240
## 12 2017-07-04 13:51:24  1.354 sec              11       4.42896
## 13 2017-07-04 13:51:24  1.364 sec              12       4.36396
## 14 2017-07-04 13:51:24  1.376 sec              13       4.20601
## 15 2017-07-04 13:51:24  1.386 sec              14       4.22847
## 16 2017-07-04 13:51:24  1.397 sec              15       4.09016
##    training_mae training_deviance
## 1                                
## 2       5.66128          59.33281
## 3       4.93430          42.37508
## 4       4.60441          35.11494
## 5       4.45181          33.51584
## 6       4.13173          27.69074
## 7       4.03389          26.61488
## 8       3.82239          25.06560
## 9       3.76684          26.17127
## 10      3.61586          22.83302
## 11      3.48712          20.81553
## 12      3.38438          19.61569
## 13      3.35582          19.04417
## 14      3.19982          17.69053
## 15      3.19883          17.87997
## 16      3.07698          16.72938
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                 variable relative_importance
## 1                                  Obama       552152.312500
## 2                                 Romney       319621.687500
## 3                         Edu_batchelors       141846.843750
## 4 Housing Units in Multi-Unit Structures        79966.773438
## 5                           Foreign Born        67501.562500
##   scaled_importance percentage
## 1          1.000000   0.409437
## 2          0.578865   0.237009
## 3          0.256898   0.105184
## 4          0.144827   0.059298
## 5          0.122252   0.050054
## 
## ---
##                                 variable relative_importance
## 45 % Change - Private Nonfarm Employment         1026.374756
## 46                   Retail Sales - 2007         1000.179504
## 47   Private Nonfarm Establishments 2013          929.477112
## 48                 Total Number of Firms          916.109863
## 49                    Housing Units 2014          684.774475
## 50     Nonemployer Establishments - 2013          186.480164
##    scaled_importance percentage
## 45          0.001859   0.000761
## 46          0.001811   0.000742
## 47          0.001683   0.000689
## 48          0.001659   0.000679
## 49          0.001240   0.000508
## 50          0.000338   0.000138
h2o.varimp(Trump_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama       552152.312500
## 2                                        Romney       319621.687500
## 3                                Edu_batchelors       141846.843750
## 4        Housing Units in Multi-Unit Structures        79966.773438
## 5                                  Foreign Born        67501.562500
## 6  Median Value of Owner-Occupied Housing Units        57713.132812
## 7                                    NonEnglish        29647.656250
## 8                                       Density         7088.221680
## 9                                    Protestant         4866.725098
## 10                                        Women         4536.835449
## 11                           Homeownership Rate         4282.443848
## 12                         Hispanic-Owned Firms         4106.511719
## 13                                     Hispanic         3964.405518
## 14                                      Poverty         3705.902588
## 15                            Persons/Household         3631.931641
## 16                                    Total_Pop         3313.454346
## 17  Accommodation and Food Service Sales - 2007         3250.160400
## 18                            Black-Owned Firms         3177.686279
## 19                            population_change         3162.727051
## 20                               population2010         3098.852783
##    scaled_importance percentage
## 1           1.000000   0.409437
## 2           0.578865   0.237009
## 3           0.256898   0.105184
## 4           0.144827   0.059298
## 5           0.122252   0.050054
## 6           0.104524   0.042796
## 7           0.053695   0.021985
## 8           0.012837   0.005256
## 9           0.008814   0.003609
## 10          0.008217   0.003364
## 11          0.007756   0.003176
## 12          0.007437   0.003045
## 13          0.007180   0.002940
## 14          0.006712   0.002748
## 15          0.006578   0.002693
## 16          0.006001   0.002457
## 17          0.005886   0.002410
## 18          0.005755   0.002356
## 19          0.005728   0.002345
## 20          0.005612   0.002298
h2o.varimp_plot(Trump_features, num_of_features = 20)

Swing

blue.Swing = as.h2o(Blue_Features_Shift)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Swing = colnames(blue.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]

Swing_features = h2o.randomForest(x=x_vars.Swing,
                                y=y_var.Swing,
                                seed=123,
                                training_frame = blue.Swing,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_21 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              10                       10               34595        15
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   16.60000        260        286   270.00000
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  33.22862
## RMSE:  5.764427
## MAE:  4.465677
## RMSLE:  NaN
## Mean Residual Deviance :  33.22862
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  30.54431
## RMSE:  5.526691
## MAE:  4.336278
## RMSLE:  NaN
## Mean Residual Deviance :  30.54431
## 
## 
## Cross-Validation Metrics Summary: 
##                        mean         sd cv_1_valid cv_2_valid cv_3_valid
## mae               4.3627768 0.32940245  3.6924987   4.488959   4.295938
## mse               31.024128  5.0281153  21.930616  31.969517  31.357307
## r2                0.7050931 0.07328907   0.761572  0.5464774   0.752726
## residual_deviance 31.024128  5.0281153  21.930616  31.969517  31.357307
## rmse              5.5322576 0.45730403  4.6830134   5.654159  5.5997596
## rmsle                   0.0        NaN        NaN        NaN        NaN
##                   cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                5.4299264  3.6811585   4.252129  4.5538936  4.5449796
## mse                 45.35922  18.635963   31.33096   37.94935   32.63026
## r2                 0.6590757 0.81730515 0.76446944 0.48357728 0.76171446
## residual_deviance   45.35922  18.635963   31.33096   37.94935   32.63026
## rmse               6.7349253   4.316939  5.5974064   6.160304  5.7122903
## rmsle                    NaN        NaN        NaN        NaN        NaN
##                   cv_9_valid cv_10_valid
## mae                4.4289503    4.259334
## mse                31.531193   27.546896
## r2                 0.7212148   0.7827988
## residual_deviance  31.531193   27.546896
## rmse               5.6152644   5.2485137
## rmsle                    NaN         NaN
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:26  1.181 sec               0              
## 2  2017-07-04 13:51:26  1.198 sec               1       7.88609
## 3  2017-07-04 13:51:26  1.215 sec               2       7.86147
## 4  2017-07-04 13:51:27  1.231 sec               3       7.21911
## 5  2017-07-04 13:51:27  1.245 sec               4       6.99703
## 6  2017-07-04 13:51:27  1.258 sec               5       6.70211
## 7  2017-07-04 13:51:27  1.273 sec               6       6.46323
## 8  2017-07-04 13:51:27  1.287 sec               7       6.25721
## 9  2017-07-04 13:51:27  1.302 sec               8       6.04191
## 10 2017-07-04 13:51:27  1.315 sec               9       5.85544
## 11 2017-07-04 13:51:27  1.328 sec              10       5.76443
##    training_mae training_deviance
## 1                                
## 2       6.14891          62.19049
## 3       6.14893          61.80270
## 4       5.77634          52.11562
## 5       5.50970          48.95839
## 6       5.25840          44.91835
## 7       5.07395          41.77339
## 8       4.89873          39.15264
## 9       4.68931          36.50465
## 10      4.52406          34.28619
## 11      4.46568          33.22862
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                       variable relative_importance
## 1                            population_change        93590.648438
## 2                                 Foreign Born        56283.562500
## 3 Median Value of Owner-Occupied Housing Units        41131.621094
## 4                               Edu_batchelors        33128.046875
## 5                                     Hispanic        29202.830078
##   scaled_importance percentage
## 1          1.000000   0.254601
## 2          0.601380   0.153112
## 3          0.439484   0.111893
## 4          0.353967   0.090121
## 5          0.312027   0.079442
## 
## ---
##                               variable relative_importance
## 45                          Households          566.473389
## 46   Nonemployer Establishments - 2013          559.845215
## 47 Private Nonfarm Establishments 2013          510.509491
## 48                      population2010          502.221375
## 49                            Veterans          362.028351
## 50                 Retail Sales - 2007          284.668762
##    scaled_importance percentage
## 45          0.006053   0.001541
## 46          0.005982   0.001523
## 47          0.005455   0.001389
## 48          0.005366   0.001366
## 49          0.003868   0.000985
## 50          0.003042   0.000774
h2o.varimp(Swing_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                             population_change        93590.648438
## 2                                  Foreign Born        56283.562500
## 3  Median Value of Owner-Occupied Housing Units        41131.621094
## 4                                Edu_batchelors        33128.046875
## 5                                      Hispanic        29202.830078
## 6                                Edu_highschool         8948.615234
## 7                                    NonEnglish         8292.908203
## 8                                         Obama         6432.069336
## 9                                        Income         6342.848145
## 10                          Travel Time to Work         6284.697754
## 11                                    age65plus         6031.405762
## 12                               population2014         4929.368652
## 13                                  Evangelical         4168.486816
## 14                                       Romney         3996.504150
## 15                      Land Area (in sq miles)         3870.975586
## 16                                        White         3538.895508
## 17                                      Poverty         3165.208984
## 18                                % Female 2014         3091.719482
## 19                Living in Same House 1+ Years         2697.395752
## 20               Manufacturers Shipments - 2007         2520.426270
##    scaled_importance percentage
## 1           1.000000   0.254601
## 2           0.601380   0.153112
## 3           0.439484   0.111893
## 4           0.353967   0.090121
## 5           0.312027   0.079442
## 6           0.095614   0.024344
## 7           0.088608   0.022560
## 8           0.068726   0.017498
## 9           0.067772   0.017255
## 10          0.067151   0.017097
## 11          0.064445   0.016408
## 12          0.052669   0.013410
## 13          0.044540   0.011340
## 14          0.042702   0.010872
## 15          0.041361   0.010530
## 16          0.037812   0.009627
## 17          0.033820   0.008611
## 18          0.033034   0.008411
## 19          0.028821   0.007338
## 20          0.026930   0.006856
h2o.varimp_plot(Swing_features, num_of_features = 20)

Swing States

Clinton

swing.Clinton = as.h2o(Swing_Features_clinton)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Clinton = colnames(swing.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]

Clinton_features = h2o.randomForest(x=x_vars.Clinton,
                                y=y_var.Clinton,
                                seed=123,
                                training_frame = swing.Clinton,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_22 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              12                       12               87721        17
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   18.41667        557        593   576.66670
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  11.16537
## RMSE:  3.341463
## MAE:  2.459176
## RMSLE:  0.0896343
## Mean Residual Deviance :  11.16537
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  8.357622
## RMSE:  2.890955
## MAE:  2.17877
## RMSLE:  0.07898867
## Mean Residual Deviance :  8.357622
## 
## 
## Cross-Validation Metrics Summary: 
##                         mean           sd  cv_1_valid cv_2_valid
## mae                2.1764514   0.15296449   2.4029014  2.1700869
## mse                 8.350991    1.6560885   10.382624  7.5849447
## r2                 0.9486032 0.0076983566   0.9301555  0.9521475
## residual_deviance   8.350991    1.6560885   10.382624  7.5849447
## rmse               2.8631918    0.2766986   3.2222078   2.754078
## rmsle             0.07846324 0.0067237546 0.085754074 0.06702775
##                   cv_3_valid cv_4_valid cv_5_valid  cv_6_valid cv_7_valid
## mae                  2.05005  2.1309838  2.2798448   1.8299347  2.3393886
## mse                7.0528684   7.709677   7.804712   5.3534007   13.17103
## r2                 0.9482558  0.9543957  0.9499299   0.9694844  0.9446068
## residual_deviance  7.0528684   7.709677   7.804712   5.3534007   13.17103
## rmse               2.6557238  2.7766306  2.7936914   2.3137417  3.6291914
## rmsle              0.0769472 0.10130545 0.07168021 0.073419444 0.08333611
##                    cv_8_valid  cv_9_valid cv_10_valid
## mae                  2.587893   1.9857495   1.9876814
## mse                  11.32123   6.9654436    6.163981
## r2                  0.9331973  0.94539094   0.9584686
## residual_deviance    11.32123   6.9654436    6.163981
## rmse                3.3647037   2.6392126   2.4827366
## rmsle             0.081611395 0.072677046  0.07087373
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:30  1.965 sec               0              
## 2  2017-07-04 13:51:30  1.985 sec               1       4.76030
## 3  2017-07-04 13:51:30  2.007 sec               2       4.36691
## 4  2017-07-04 13:51:30  2.027 sec               3       4.26996
## 5  2017-07-04 13:51:30  2.049 sec               4       4.08651
## 6  2017-07-04 13:51:30  2.071 sec               5       4.04406
## 7  2017-07-04 13:51:30  2.093 sec               6       3.96838
## 8  2017-07-04 13:51:30  2.114 sec               7       3.72528
## 9  2017-07-04 13:51:30  2.137 sec               8       3.62964
## 10 2017-07-04 13:51:30  2.159 sec               9       3.58004
## 11 2017-07-04 13:51:30  2.182 sec              10       3.57654
## 12 2017-07-04 13:51:30  2.204 sec              11       3.42839
## 13 2017-07-04 13:51:30  2.226 sec              12       3.34146
##    training_mae training_deviance
## 1                                
## 2       3.45215          22.66043
## 3       3.26566          19.06988
## 4       3.19231          18.23253
## 5       3.08619          16.69955
## 6       2.99280          16.35440
## 7       2.91640          15.74806
## 8       2.75213          13.87770
## 9       2.65912          13.17431
## 10      2.60208          12.81665
## 11      2.59165          12.79160
## 12      2.51838          11.75382
## 13      2.45918          11.16537
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                 variable relative_importance
## 1                                  Obama       777059.187500
## 2                                 Romney       204742.046875
## 3 Housing Units in Multi-Unit Structures       161199.359375
## 4                                  White        70074.117188
## 5                         Edu_batchelors        43863.246094
##   scaled_importance percentage
## 1          1.000000   0.540407
## 2          0.263483   0.142388
## 3          0.207448   0.112106
## 4          0.090179   0.048733
## 5          0.056448   0.030505
## 
## ---
##                            variable relative_importance scaled_importance
## 45              Retail Sales - 2007          792.955688          0.001020
## 46            Total Number of Firms          719.299255          0.000926
## 47                   population2014          643.103516          0.000828
## 48 Merchant Wholesaler Sales - 2007          429.022583          0.000552
## 49                       Households          368.605408          0.000474
## 50             Hispanic-Owned Firms          162.402924          0.000209
##    percentage
## 45   0.000551
## 46   0.000500
## 47   0.000447
## 48   0.000298
## 49   0.000256
## 50   0.000113
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama       777059.187500
## 2                                        Romney       204742.046875
## 3        Housing Units in Multi-Unit Structures       161199.359375
## 4                                         White        70074.117188
## 5                                Edu_batchelors        43863.246094
## 6                    Private Nonfarm Employment        29157.656250
## 7                                         Black        22736.304688
## 8                                       Density        12924.420898
## 9  Median Value of Owner-Occupied Housing Units        11243.088867
## 10                                 Foreign Born        11206.737305
## 11                            population_change         6721.944824
## 12                            Black-Owned Firms         6536.583496
## 13                           Homeownership Rate         4504.656250
## 14                               Edu_highschool         4246.471680
## 15                                        Women         3736.649414
## 16                             Building Permits         3633.344238
## 17                                     Veterans         3322.478760
## 18                                   NonEnglish         3133.468506
## 19  Accommodation and Food Service Sales - 2007         3039.930908
## 20                Living in Same House 1+ Years         3037.948242
##    scaled_importance percentage
## 1           1.000000   0.540407
## 2           0.263483   0.142388
## 3           0.207448   0.112106
## 4           0.090179   0.048733
## 5           0.056448   0.030505
## 6           0.037523   0.020278
## 7           0.029259   0.015812
## 8           0.016632   0.008988
## 9           0.014469   0.007819
## 10          0.014422   0.007794
## 11          0.008650   0.004675
## 12          0.008412   0.004546
## 13          0.005797   0.003133
## 14          0.005465   0.002953
## 15          0.004809   0.002599
## 16          0.004676   0.002527
## 17          0.004276   0.002311
## 18          0.004032   0.002179
## 19          0.003912   0.002114
## 20          0.003910   0.002113
h2o.varimp_plot(Clinton_features, num_of_features = 20)

Trump

swing.Trump = as.h2o(Swing_Features_Trump)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Trump = colnames(swing.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]

Trump_features = h2o.randomForest(x=x_vars.Trump,
                                y=y_var.Trump,
                                seed=123,
                                training_frame = swing.Trump,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_23 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              11                       11               80086        16
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   18.45455        551        589   574.18180
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  12.14839
## RMSE:  3.485455
## MAE:  2.628891
## RMSLE:  0.08165229
## Mean Residual Deviance :  12.14839
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  8.166665
## RMSE:  2.857738
## MAE:  2.152723
## RMSLE:  0.06808743
## Mean Residual Deviance :  8.166665
## 
## 
## Cross-Validation Metrics Summary: 
##                          mean           sd cv_1_valid cv_2_valid
## mae                 2.1566992   0.15428063  2.2658913  2.2943878
## mse                  8.182738    1.2559689   8.401714  7.5393467
## r2                  0.9511253 0.0067784255  0.9464498  0.9583296
## residual_deviance    8.182738    1.2559689   8.401714  7.5393467
## rmse                2.8428943    0.2243769   2.898571  2.7457871
## rmsle             0.064570345  0.015478144 0.06336895 0.05494856
##                   cv_3_valid cv_4_valid cv_5_valid cv_6_valid  cv_7_valid
## mae                2.1010022  2.1584582  2.1456854  1.6351633    2.353568
## mse                 8.701764   8.695951   7.391761  4.7056117   11.743283
## r2                 0.9384737 0.94882405 0.95611984 0.97479427   0.9508477
## residual_deviance   8.701764   8.695951   7.391761  4.7056117   11.743283
## rmse               2.9498754    2.94889  2.7187793  2.1692421   3.4268475
## rmsle             0.06087396 0.04998338 0.06637724 0.03820809 0.121566564
##                    cv_8_valid  cv_9_valid cv_10_valid
## mae                   2.34893   1.9163048   2.3476014
## mse                  9.737516   6.5373297    8.373104
## r2                 0.94366795   0.9480574   0.9456889
## residual_deviance    9.737516   6.5373297    8.373104
## rmse                3.1204994   2.5568202   2.8936317
## rmsle             0.082081355 0.053765394  0.05452997
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:34  1.788 sec               0              
## 2  2017-07-04 13:51:34  1.813 sec               1       5.08287
## 3  2017-07-04 13:51:34  1.834 sec               2       4.69558
## 4  2017-07-04 13:51:34  1.854 sec               3       4.70470
## 5  2017-07-04 13:51:34  1.875 sec               4       4.34390
## 6  2017-07-04 13:51:34  1.896 sec               5       4.18901
## 7  2017-07-04 13:51:34  1.917 sec               6       3.93378
## 8  2017-07-04 13:51:34  1.939 sec               7       3.81017
## 9  2017-07-04 13:51:34  1.959 sec               8       3.68352
## 10 2017-07-04 13:51:34  1.979 sec               9       3.65731
## 11 2017-07-04 13:51:34  2.002 sec              10       3.60603
## 12 2017-07-04 13:51:34  2.023 sec              11       3.48545
##    training_mae training_deviance
## 1                                
## 2       3.66599          25.83555
## 3       3.43913          22.04844
## 4       3.43246          22.13416
## 5       3.27439          18.86943
## 6       3.13936          17.54784
## 7       2.96050          15.47461
## 8       2.87135          14.51737
## 9       2.79873          13.56832
## 10      2.78048          13.37590
## 11      2.73468          13.00348
## 12      2.62889          12.14839
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                 variable relative_importance
## 1                                  Obama       791054.000000
## 2 Housing Units in Multi-Unit Structures       166076.937500
## 3                                 Romney       134347.890625
## 4                         Edu_batchelors        75131.203125
## 5             Private Nonfarm Employment        29824.814453
##   scaled_importance percentage
## 1          1.000000   0.575958
## 2          0.209944   0.120919
## 3          0.169834   0.097817
## 4          0.094976   0.054702
## 5          0.037703   0.021715
## 
## ---
##                            variable relative_importance scaled_importance
## 45                           Mormon          669.428040          0.000846
## 46                   population2014          503.831940          0.000637
## 47            Total Number of Firms          466.595093          0.000590
## 48               Housing Units 2014          452.721649          0.000572
## 49                       Households          451.557220          0.000571
## 50 Merchant Wholesaler Sales - 2007          347.838165          0.000440
##    percentage
## 45   0.000487
## 46   0.000367
## 47   0.000340
## 48   0.000330
## 49   0.000329
## 50   0.000253
h2o.varimp(Trump_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                         Obama       791054.000000
## 2        Housing Units in Multi-Unit Structures       166076.937500
## 3                                        Romney       134347.890625
## 4                                Edu_batchelors        75131.203125
## 5                    Private Nonfarm Employment        29824.814453
## 6                                         Black        20885.173828
## 7  Median Value of Owner-Occupied Housing Units        17673.820312
## 8                                         White        14739.574219
## 9                                       Density        14615.491211
## 10                                 Foreign Born        12562.568359
## 11                            population_change        11680.703125
## 12                                     Hispanic         8312.496094
## 13                                       Income         4991.144531
## 14                                    age65plus         4984.941895
## 15                Living in Same House 1+ Years         4732.471680
## 16                                   NonEnglish         4547.553711
## 17                               Edu_highschool         4121.343750
## 18                           Homeownership Rate         3489.776367
## 19                                   Protestant         3357.005615
## 20                                     Veterans         3319.221924
##    scaled_importance percentage
## 1           1.000000   0.575958
## 2           0.209944   0.120919
## 3           0.169834   0.097817
## 4           0.094976   0.054702
## 5           0.037703   0.021715
## 6           0.026402   0.015206
## 7           0.022342   0.012868
## 8           0.018633   0.010732
## 9           0.018476   0.010641
## 10          0.015881   0.009147
## 11          0.014766   0.008505
## 12          0.010508   0.006052
## 13          0.006309   0.003634
## 14          0.006302   0.003629
## 15          0.005982   0.003446
## 16          0.005749   0.003311
## 17          0.005210   0.003001
## 18          0.004412   0.002541
## 19          0.004244   0.002444
## 20          0.004196   0.002417
h2o.varimp_plot(Trump_features, num_of_features = 20)

Swing

swing.Swing = as.h2o(Swing_Features_Shift)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
vars.Swing = colnames(swing.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]

Swing_features = h2o.randomForest(x=x_vars.Swing,
                                y=y_var.Swing,
                                seed=123,
                                training_frame = swing.Swing,
                                ntrees=200,
                                stopping_rounds = 2,
                                score_each_iteration = TRUE,
                                nfolds = 10)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
## 
## H2ORegressionModel: drf
## Model Key:  DRF_model_R_1499186394342_24 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              13                       13               95195        19
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        20   19.92308        551        593   577.46155
## 
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
## 
## MSE:  26.23519
## RMSE:  5.12203
## MAE:  3.838263
## RMSLE:  NaN
## Mean Residual Deviance :  26.23519
## 
## 
## 
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  20.73719
## RMSE:  4.553811
## MAE:  3.498301
## RMSLE:  NaN
## Mean Residual Deviance :  20.73719
## 
## 
## Cross-Validation Metrics Summary: 
##                         mean          sd cv_1_valid cv_2_valid cv_3_valid
## mae                3.4971883  0.25601858   3.771691   3.336542  3.5872598
## mse                 20.68097   2.9543352  23.686274   17.17209  23.960949
## r2                0.81846476 0.023982769 0.80416083 0.83921343  0.7725684
## residual_deviance   20.68097   2.9543352  23.686274   17.17209  23.960949
## rmse               4.5206423  0.34982938  4.8668547  4.1439223  4.8949924
## rmsle                    0.0         NaN        NaN        NaN        NaN
##                   cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae                3.6994224  3.6790047   2.567361  3.6218374  3.9002304
## mse                21.105486  20.328083  10.849713   23.64021  26.548128
## r2                 0.7882603 0.86201805  0.8884333  0.7895218  0.8213563
## residual_deviance  21.105486  20.328083  10.849713   23.64021  26.548128
## rmse                4.594071  4.5086675  3.2938902  4.8621197  5.1524878
## rmsle                    NaN        NaN        NaN        NaN        NaN
##                   cv_9_valid cv_10_valid
## mae                3.2200727   3.5884612
## mse                19.201723   20.317043
## r2                0.81467414  0.80444115
## residual_deviance  19.201723   20.317043
## rmse                4.381977    4.507443
## rmsle                    NaN         NaN
## 
## Scoring History: 
##              timestamp   duration number_of_trees training_rmse
## 1  2017-07-04 13:51:38  2.326 sec               0              
## 2  2017-07-04 13:51:38  2.351 sec               1       7.20856
## 3  2017-07-04 13:51:38  2.379 sec               2       7.52751
## 4  2017-07-04 13:51:38  2.401 sec               3       7.05333
## 5  2017-07-04 13:51:38  2.421 sec               4       6.82414
## 6  2017-07-04 13:51:38  2.443 sec               5       6.52407
## 7  2017-07-04 13:51:38  2.464 sec               6       6.21454
## 8  2017-07-04 13:51:38  2.486 sec               7       5.96204
## 9  2017-07-04 13:51:38  2.507 sec               8       5.72039
## 10 2017-07-04 13:51:38  2.528 sec               9       5.45401
## 11 2017-07-04 13:51:38  2.550 sec              10       5.35521
## 12 2017-07-04 13:51:38  2.573 sec              11       5.23388
## 13 2017-07-04 13:51:38  2.593 sec              12       5.20277
## 14 2017-07-04 13:51:38  2.614 sec              13       5.12203
##    training_mae training_deviance
## 1                                
## 2       5.51825          51.96337
## 3       5.70884          56.66345
## 4       5.33133          49.74952
## 5       5.10411          46.56883
## 6       4.94928          42.56346
## 7       4.67275          38.62053
## 8       4.54714          35.54592
## 9       4.30118          32.72291
## 10      4.14045          29.74626
## 11      4.06707          28.67831
## 12      3.98612          27.39346
## 13      3.91858          27.06877
## 14      3.83826          26.23519
## 
## Variable Importances: (Extract with `h2o.varimp`) 
## =================================================
## 
## Variable Importances: 
##                                       variable relative_importance
## 1                               Edu_batchelors       289576.156250
## 2                            population_change       197768.937500
## 3                                 Foreign Born        78123.242188
## 4 Median Value of Owner-Occupied Housing Units        60032.285156
## 5                                        White        50424.574219
##   scaled_importance percentage
## 1          1.000000   0.274311
## 2          0.682960   0.187344
## 3          0.269785   0.074005
## 4          0.207311   0.056868
## 5          0.174132   0.047766
## 
## ---
##                            variable relative_importance scaled_importance
## 45 Merchant Wholesaler Sales - 2007         1664.167480          0.005747
## 46               Housing Units 2014         1423.829590          0.004917
## 47                Black-Owned Firms         1282.661987          0.004429
## 48                       Households          959.471252          0.003313
## 49                   population2010          740.479126          0.002557
## 50                   population2014          713.275024          0.002463
##    percentage
## 45   0.001576
## 46   0.001349
## 47   0.001215
## 48   0.000909
## 49   0.000701
## 50   0.000676
h2o.varimp(Swing_features)[1:20,]
## Variable Importances: 
##                                        variable relative_importance
## 1                                Edu_batchelors       289576.156250
## 2                             population_change       197768.937500
## 3                                  Foreign Born        78123.242188
## 4  Median Value of Owner-Occupied Housing Units        60032.285156
## 5                                         White        50424.574219
## 6                                         Black        45287.859375
## 7                                       Density        33644.671875
## 8                                        Romney        31501.941406
## 9                                         Obama        30076.775391
## 10                                     Hispanic        19566.589844
## 11                                  Evangelical        15555.184570
## 12                               Edu_highschool        13553.864258
## 13                                       Income        12127.321289
## 14                                     Catholic        11660.565430
## 15                                   NonEnglish        10811.342773
## 16                                % Female 2014        10205.845703
## 17                                    Total_Pop         7789.696777
## 18                                      Poverty         7647.796387
## 19          Private Nonfarm Establishments 2013         7351.964844
## 20                           Homeownership Rate         7218.028320
##    scaled_importance percentage
## 1           1.000000   0.274311
## 2           0.682960   0.187344
## 3           0.269785   0.074005
## 4           0.207311   0.056868
## 5           0.174132   0.047766
## 6           0.156394   0.042901
## 7           0.116186   0.031871
## 8           0.108786   0.029841
## 9           0.103865   0.028491
## 10          0.067570   0.018535
## 11          0.053717   0.014735
## 12          0.046806   0.012839
## 13          0.041880   0.011488
## 14          0.040268   0.011046
## 15          0.037335   0.010241
## 16          0.035244   0.009668
## 17          0.026900   0.007379
## 18          0.026410   0.007245
## 19          0.025389   0.006964
## 20          0.024926   0.006838
h2o.varimp_plot(Swing_features, num_of_features = 20)

Explaining away the Shift in Swing States

Swing_1 = summary(lm(per_shift ~ Swing_Features_Shift[,17], data=Swing_Features_Shift))$adj.r.squared
Swing_2 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5],data=Swing_Features_Shift))$adj.r.squared
Swing_3 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14],data=Swing_Features_Shift))$adj.r.squared
Swing_4 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14] + Swing_Features_Shift[,23],data=Swing_Features_Shift))$adj.r.squared
Swing_5 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14] + Swing_Features_Shift[,23] + Swing_Features_Shift[,10],data=Swing_Features_Shift))$adj.r.squared


theswing = c(0,Swing_1,Swing_2,Swing_3,Swing_4,Swing_5)
num = c(0,1,2,3,4,5)

plot(num,
     theswing,
     col = "blue",
     type = "b",
     main = "Accuracy for Features Selected",
     xlab = "Number of features",
     ylab = "Accuracy")

cat("Swing - Edu_batchelors:", Swing_1)
## Swing - Edu_batchelors: 0.463958

My Model - Revisited

my_subset = Swing_Features_Shift[,c(5,8,10,12,17,22,23,26,42,46,50,51)]
my_subset = na.omit(my_subset)
my_subset$IncomeXPopChange = my_subset$population_change * my_subset$Edu_batchelors
my_model = lm(per_shift ~ .,data=my_subset)
summary(my_model)
## 
## Call:
## lm(formula = per_shift ~ ., data = my_subset)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.2337  -3.4398  -0.1666   3.4990  18.5681 
## 
## Coefficients:
##                                                  Estimate Std. Error
## (Intercept)                                     1.113e+01  1.932e+00
## population_change                              -7.623e-01  1.190e-01
## age65plus                                      -4.233e-02  4.514e-02
## White                                           2.157e-01  1.650e-02
## Hispanic                                       -2.175e-01  2.251e-02
## Edu_batchelors                                 -7.473e-01  4.418e-02
## `Housing Units in Multi-Unit Structures`       -5.966e-03  2.823e-02
## `Median Value of Owner-Occupied Housing Units` -2.250e-05  5.613e-06
## Income                                          2.239e-04  7.200e-05
## `Building Permits`                             -4.364e-04  1.930e-04
## Evangelical                                    -2.693e-01  2.443e-02
## Christian                                       9.522e-02  1.417e-02
## IncomeXPopChange                                2.046e-02  4.347e-03
##                                                t value Pr(>|t|)    
## (Intercept)                                      5.759 1.16e-08 ***
## population_change                               -6.408 2.37e-10 ***
## age65plus                                       -0.938  0.34867    
## White                                           13.071  < 2e-16 ***
## Hispanic                                        -9.662  < 2e-16 ***
## Edu_batchelors                                 -16.916  < 2e-16 ***
## `Housing Units in Multi-Unit Structures`        -0.211  0.83269    
## `Median Value of Owner-Occupied Housing Units`  -4.008 6.63e-05 ***
## Income                                           3.109  0.00194 ** 
## `Building Permits`                              -2.261  0.02400 *  
## Evangelical                                    -11.024  < 2e-16 ***
## Christian                                        6.722 3.17e-11 ***
## IncomeXPopChange                                 4.707 2.90e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.343 on 903 degrees of freedom
## Multiple R-squared:  0.7577, Adjusted R-squared:  0.7544 
## F-statistic: 235.3 on 12 and 903 DF,  p-value: < 2.2e-16

Polarization Measure

Density Plot

votes$margin_2016 = abs(votes$Trump - votes$Clinton)
votes$margin_2012 = abs(votes$Obama - votes$Romney)
votes$polarization = votes$margin_2016 - votes$margin_2012

plot(density(votes$margin_2016), 
         main = "County Winner Density Plot",
         ylab = "density",
         xlab = "county margin",
         ylim=c(0,.020),
         xlim=c(0,100),
         col="red")
par(new=T)
plot(density(votes$margin_2012), 
         main = "County Winner Density Plot",
         ylab = "density",
         xlab = "county margin",
         ylim=c(0,.020),
         xlim=c(0,100),
         col="blue")
legend("topright",legend=c("2012","2016"),fill=c("blue","red"))

Ploting Polarization

polarization = votes[,c(1,113)]
colnames(polarization) = c("region","value")
polarization$value = cut(polarization$value, breaks = c(-Inf,0,Inf), labels=c("Less Polarized","More Polarized"))


c= CountyChoropleth$new(polarization)
c$title = "Polarization Shift Measurement"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","red"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization

2016 Polarization Results

polarization2016 = votes[,c(1,111)]
colnames(polarization2016) = c("region","value")
polarization2016$value = cut(polarization2016$value, breaks = c(0,10,20,50,Inf), labels=c("0%-10% Margin","10%-20% Margin","20%-50% Margin","50%-100% Margin"))


c= CountyChoropleth$new(polarization2016)
c$title = "Polarization Measurement - 2016"
c$add_state_outline = TRUE
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","lightblue","red","darkred"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization2016 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization2016

polarization2012 = votes[,c(1,112)]
colnames(polarization2012) = c("region","value")
polarization2012$value = cut(polarization2012$value, breaks = c(0,10,20,50,Inf), labels=c("0%-10% Margin","10%-20% Margin","20%-50% Margin","50%-100% Margin"))


c= CountyChoropleth$new(polarization2012)
c$title = "Polarization Measurement - 2012"
c$add_state_outline = TRUE
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","lightblue","red","darkred"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization2012 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization2012

polarization10 = votes[,c(1,111)]
colnames(polarization10) = c("region","value")
polarization10$value = cut(polarization10$value, breaks = c(0,10,Inf), labels=c("<10% Margin",">10% Margin"))


c= CountyChoropleth$new(polarization10)
c$title = "Polarization @ 10% Margin - 2016"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization10 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization10

polarization102 = votes[,c(1,112)]
colnames(polarization102) = c("region","value")
polarization102$value = cut(polarization102$value, breaks = c(0,10,Inf), labels=c("<10% Margin",">10% Margin"))


c= CountyChoropleth$new(polarization102)
c$title = "Polarization @ 10% Margin - 2012"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization102 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization102

polarization20 = votes[,c(1,111)]
colnames(polarization20) = c("region","value")
polarization20$value = cut(polarization20$value, breaks = c(0,20,Inf), labels=c("<20% Margin",">20% Margin"))


c= CountyChoropleth$new(polarization20)
c$title = "Polarization @ 20% Margin - 2016"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization20 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization20

polarization202 = votes[,c(1,112)]
colnames(polarization202) = c("region","value")
polarization202$value = cut(polarization202$value, breaks = c(0,20,Inf), labels=c("<20% Margin",">20% Margin"))


c= CountyChoropleth$new(polarization202)
c$title = "Polarization @ 20% Margin - 2012"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization202 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization202

Change in polarization

pol_2016 = (length(which(polarization10$value=="<10% Margin")) / dim(polarization10)[1]) * 100
pol_2012 = (length(which(polarization102$value=="<10% Margin")) / dim(polarization102)[1]) * 100
increase_pol10 = pol_2012 - pol_2016
cat("% Counties Polarization Increase @ 10% Level: ", increase_pol10, "\n")
## % Counties Polarization Increase @ 10% Level:  7.326478
pol_20162 = (length(which(polarization20$value=="<20% Margin")) / dim(polarization20)[1]) * 100
pol_20122 = (length(which(polarization202$value=="<20% Margin")) / dim(polarization202)[1]) * 100
increase_pol20 = pol_20122 - pol_20162
cat("% Counties Polarization Increase @ 20% Level: ", increase_pol20, "\n")
## % Counties Polarization Increase @ 20% Level:  13.81748
cat("% Counties that were within 20% points: ", (length(which(polarization20$value=="<20% Margin")) / dim(polarization20)[1]) * 100, "\n")
## % Counties that were within 20% points:  22.01157
cat("% Counties that were within 10% points: ", (length(which(polarization10$value=="<10% Margin")) / dim(polarization10)[1]) * 100, "\n")
## % Counties that were within 10% points:  10.50771
votes$AvgPol_2016 = votes$total_votes_2016 * votes$margin_2016
votes$AvgPol_2012 = votes$total_votes_2012 * votes$margin_2012

polarization_2016 = sum(votes$AvgPol_2016) / sum(votes$total_votes_2016)
cat('Polarization 2016: ',polarization_2016,'\n')
## Polarization 2016:  27.88361
polarization_2012 = sum(votes$AvgPol_2012) / sum(votes$total_votes_2012)
cat('Polarization 2012:', polarization_2012,'\n')
## Polarization 2012: 24.21305
pol_change = polarization_2016 - polarization_2012
cat('Polarization Change: ',pol_change,'\n')
## Polarization Change:  3.670557