votes = read.csv("C:/Users/onest/desktop/2012-and-2016-presidential-elections/votes.csv", header=T)
religion = read.csv("C:/Users/onest/desktop/2012-and-2016-presidential-elections/religion.csv", header=T)
religions = religion[,c(1,43,44,45,46,47,48,49,50,51,52,53,54,55)]
colnames(religions) = c("FIPS", "Total_Pop", "Evangelical", "Protestant", "Historically_Black", "Catholic", "Jewish", "Mormon", "Islamic", "Hindu", "Buddhist", "Orthodox", "Jehovas_Witnesses", "Other_Religion")
votes = merge(x=votes,y=religions,by="FIPS", all.x=T)
colnames(votes)[c(29:31,33,36:39,41:43,47:54,56,58:75)] = c("Population 2012",
"Persons Under 5",
"Persons Under 18",
"% Female 2014",
"Indian and Alaskan Native",
"Asian",
"Native Hawaiian",
"2+ Races",
"White",
"Living in Same House 1+ Years",
"Foreign Born",
"Veterans",
"Travel Time to Work",
"Housing Units 2014",
"Homeownership Rate",
"Housing Units in Multi-Unit Structures",
"Median Value of Owner-Occupied Housing Units",
"Households",
"Persons/Household",
"Median Household Income",
"Private Nonfarm Establishments 2013",
"Private Nonfarm Employment",
"% Change - Private Nonfarm Employment",
"Nonemployer Establishments - 2013",
"Total Number of Firms",
"Black-Owned Firms",
"Indidan and Alaskan -Owned Firms",
"Asian-Owned Firms",
"Hawaiian-Owned Firms",
"Hispanic-Owned Firms",
"Women",
"Manufacturers Shipments - 2007",
"Merchant Wholesaler Sales - 2007",
"Retail Sales - 2007",
"Retail Sales / Capita - 2007",
"Accommodation and Food Service Sales - 2007",
"Building Permits",
"Land Area (in sq miles)")
colnames(votes)
## [1] "FIPS"
## [2] "X.1"
## [3] "X"
## [4] "combined_fips"
## [5] "votes_dem_2016"
## [6] "votes_gop_2016"
## [7] "total_votes_2016"
## [8] "Clinton"
## [9] "Trump"
## [10] "diff_2016"
## [11] "per_point_diff_2016"
## [12] "state_abbr"
## [13] "county_name"
## [14] "total_votes_2012"
## [15] "votes_dem_2012"
## [16] "votes_gop_2012"
## [17] "county_fips"
## [18] "state_fips"
## [19] "Obama"
## [20] "Romney"
## [21] "diff_2012"
## [22] "per_point_diff_2012"
## [23] "fips"
## [24] "area_name"
## [25] "state_abbreviation"
## [26] "population2014"
## [27] "population2010"
## [28] "population_change"
## [29] "Population 2012"
## [30] "Persons Under 5"
## [31] "Persons Under 18"
## [32] "age65plus"
## [33] "% Female 2014"
## [34] "White"
## [35] "Black"
## [36] "Indian and Alaskan Native"
## [37] "Asian"
## [38] "Native Hawaiian"
## [39] "2+ Races"
## [40] "Hispanic"
## [41] "White"
## [42] "Living in Same House 1+ Years"
## [43] "Foreign Born"
## [44] "NonEnglish"
## [45] "Edu_highschool"
## [46] "Edu_batchelors"
## [47] "Veterans"
## [48] "Travel Time to Work"
## [49] "Housing Units 2014"
## [50] "Homeownership Rate"
## [51] "Housing Units in Multi-Unit Structures"
## [52] "Median Value of Owner-Occupied Housing Units"
## [53] "Households"
## [54] "Persons/Household"
## [55] "Income"
## [56] "Median Household Income"
## [57] "Poverty"
## [58] "Private Nonfarm Establishments 2013"
## [59] "Private Nonfarm Employment"
## [60] "% Change - Private Nonfarm Employment"
## [61] "Nonemployer Establishments - 2013"
## [62] "Total Number of Firms"
## [63] "Black-Owned Firms"
## [64] "Indidan and Alaskan -Owned Firms"
## [65] "Asian-Owned Firms"
## [66] "Hawaiian-Owned Firms"
## [67] "Hispanic-Owned Firms"
## [68] "Women"
## [69] "Manufacturers Shipments - 2007"
## [70] "Merchant Wholesaler Sales - 2007"
## [71] "Retail Sales - 2007"
## [72] "Retail Sales / Capita - 2007"
## [73] "Accommodation and Food Service Sales - 2007"
## [74] "Building Permits"
## [75] "Land Area (in sq miles)"
## [76] "Density"
## [77] "Clinton_Obama"
## [78] "Trump_Romney"
## [79] "Trump_Prediction"
## [80] "Clinton_Prediction"
## [81] "Trump_Deviation"
## [82] "Clinton_Deviation"
## [83] "Total_Pop"
## [84] "Evangelical"
## [85] "Protestant"
## [86] "Historically_Black"
## [87] "Catholic"
## [88] "Jewish"
## [89] "Mormon"
## [90] "Islamic"
## [91] "Hindu"
## [92] "Buddhist"
## [93] "Orthodox"
## [94] "Jehovas_Witnesses"
## [95] "Other_Religion"
votes$change_dem_votes = votes$votes_dem_2016 - votes$votes_dem_2012
dem_votes = votes[,c(1,96)]
dem_votes[,3] = NA
colnames(dem_votes) = c('region','votes','value')
for(i in seq(1:dim(dem_votes)[1])){
if(dem_votes[i,2] > 0 && dem_votes[i,2] < 1000){
dem_votes[i,3] = "Gain - Small (<1000)"
} else if(dem_votes[i,2] > 1000 && dem_votes[i,2] < 10000){
dem_votes[i,3] = "Gain - Considerable (1000-10000)"
} else if(dem_votes[i,2] >= 10000){
dem_votes[i,3] = "Gain - Large (>10000)"
} else if(dem_votes[i,2] < 0 && dem_votes[i,2] > -1000) {
dem_votes[i,3] = "Loss - Small (<1000)"
} else if(dem_votes[i,2] < -1000 && dem_votes[i,2] > -10000){
dem_votes[i,3] = "Loss - Considerable (1000-10000)"
} else if(dem_votes[i,2] < -10000){
dem_votes[i,3] = "Loss - Large (>10000)"
} else{
dem_votes[i,3] = "Equal"
}
}
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton where Blue Represents better Clinton Performance"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white", "blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
dem_change_US = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_US
#Break down county vote into regions of the US for easier viewing
##New England Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
dem_change_NE = c$render() +
theme(legend.position = "right")
dem_change_NE
##Mid-Atlantic Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
dem_change_MA = c$render() +
theme(legend.position = "right")
dem_change_MA
##South East Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
dem_change_SE = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_SE
##Mid West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
dem_change_MW = c$render() +
theme(legend.position = "right")
dem_change_MW
##South West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
dem_change_SW = c$render() +
theme(legend.position = "right")
dem_change_SW
##West Region
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
dem_change_W = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
dem_change_W
#Explore Vote Count by Swing States
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
dem_change_swing = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
dem_change_swing
votes$change_rep_votes = votes$votes_gop_2016 - votes$votes_gop_2012
rep_votes = votes[,c(1,97)]
rep_votes[,3] = NA
colnames(rep_votes) = c('region','votes','value')
for(i in seq(1:dim(rep_votes)[1])){
if(rep_votes[i,2] > 0 && rep_votes[i,2] < 1000){
rep_votes[i,3] = "Gain - Small (<1000)"
} else if(rep_votes[i,2] > 1000 && rep_votes[i,2] < 10000){
rep_votes[i,3] = "Gain - Considerable (1000-10000)"
} else if(rep_votes[i,2] >= 10000){
rep_votes[i,3] = "Gain - Large (>10000)"
} else if(rep_votes[i,2] < 0 && rep_votes[i,2] > -1000) {
rep_votes[i,3] = "Loss - Small (<1000)"
} else if(rep_votes[i,2] < -1000 && rep_votes[i,2] > -10000){
rep_votes[i,3] = "Loss - Considerable (1000-10000)"
} else if(rep_votes[i,2] < -10000){
rep_votes[i,3] = "Loss - Large (>10000)"
} else{
rep_votes[i,3] = "Equal"
}
}
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump where Red Represents better Trump Performance"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
rep_change_US = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_US
#Break down county vote into regions of the US for easier viewing
##New England Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
rep_change_NE = c$render() +
theme(legend.position = "right")
rep_change_NE
##Mid-Atlantic Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
rep_change_MA = c$render() +
theme(legend.position = "right")
rep_change_MA
##South East Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
rep_change_SE = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_SE
##Mid West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
rep_change_MW = c$render() +
theme(legend.position = "right")
rep_change_MW
##South West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
rep_change_SW = c$render() +
theme(legend.position = "right")
rep_change_SW
##West Region
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
rep_change_W = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
rep_change_W
#Explore Vote Count by Swing States
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("white","red","darkred","lightpink","blue","navy","deepskyblue"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
rep_change_swing = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
rep_change_swing
#Interesting State Examiniation
#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Wisconsin"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("blue","red","darkred","lightpink"))
c$set_zoom("wisconsin")
dem_change_WI = c$render() +
theme(legend.position = "right")
dem_change_WI
#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Wisconsin"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("red","lightpink","blue","navy","deepskyblue"))
c$set_zoom("wisconsin")
rep_change_WI = c$render() +
theme(legend.position = "right")
rep_change_WI
#Appear to be more significant losses for Clinton than gains for Trump
#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Texas"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("white","blue","navy","deepskyblue","red","lightpink"))
c$set_zoom("texas")
dem_change_TX = c$render() +
theme(legend.position = "right")
dem_change_TX
#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Texas"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("red","darkred", "lightpink","blue","navy","deepskyblue"))
c$set_zoom("texas")
rep_change_TX = c$render() +
theme(legend.position = "right")
rep_change_TX
#Clinton
c = CountyChoropleth$new(dem_votes)
c$title = "Change from Obama to Clinton (Blue = Better Clinton) - Arizona"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","lightpink"))
c$set_zoom("arizona")
dem_change_AZ = c$render() +
theme(legend.position = "right")
dem_change_AZ
#Trump
c = CountyChoropleth$new(rep_votes)
c$title = "Change from Romney to Trump (Red = Better Trump) - Arizona"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("red","lightpink","blue","navy","deepskyblue"))
c$set_zoom("arizona")
rep_change_AZ = c$render() +
theme(legend.position = "right")
rep_change_AZ
votes$per_shift = votes$Trump_Romney - votes$Clinton_Obama
shift = votes[,c(1,98)]
summary(shift$per_shift)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.41210 0.04355 0.10060 0.10620 0.17450 0.46760
ggplot(shift,aes(x="distribution",y=per_shift)) + geom_boxplot(fill = "firebrick", colour = "darkblue") + ggtitle("County Shifts toward Republican from 2012 to 2016") + ylab("Percentage Shift toward Republican from 2012 to 2016")
shift[,3] = NA
colnames(shift) = c('region','shift','value')
for(i in seq(1:dim(shift)[1])){
if(shift[i,2] > 0 && shift[i,2] < .05){
shift[i,3] = "GOP - Small (<5%)"
} else if(shift[i,2] > .05 && shift[i,2] < .10){
shift[i,3] = "GOP - Considerable (<10%)"
} else if(shift[i,2] >= .10){
shift[i,3] = "GOP - Large (>10%)"
} else if(shift[i,2] < 0 && shift[i,2] > -.05) {
shift[i,3] = "Dem - Small (<5%)"
} else if(shift[i,2] < -.05 && shift[i,2] > -.10){
shift[i,3] = "Dem - Considerable (<10%)"
} else if(shift[i,2] < -.10){
shift[i,3] = "Dem - Large (>10%)"
} else{
shift[i,3] = "Equal"
}
}
#Entire country
c = CountyChoropleth$new(shift)
c$title = "Shift from 2012 to 2016 by County Percentage"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
per_change_US = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_US
#Break down county vote into regions of the US for easier viewing
##New England Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
per_change_NE = c$render() +
theme(legend.position = "right")
per_change_NE
##Mid-Atlantic Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
per_change_MA = c$render() +
theme(legend.position = "right")
per_change_MA
##South East Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
per_change_SE = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_SE
##Mid West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
per_change_MW = c$render() +
theme(legend.position = "right")
per_change_MW
##South West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
per_change_SW = c$render() +
theme(legend.position = "right")
per_change_SW
##West Region
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
per_change_W = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
per_change_W
#Explore Vote Count by Swing States
c = CountyChoropleth$new(shift)
c$title = "Percentage Shift from 2012 to 2016 - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("blue","navy","deepskyblue","red","darkred","lightpink"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
per_change_swing = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
per_change_swing
total_shift = merge(x=dem_votes,y=rep_votes,by="region",all=TRUE)
total_shift[,6] = NA
colnames(total_shift) = c("region","dem_votes","dem_gain_loss","rep_votes","rep_gain_loss","value")
for(i in seq(i:dim(total_shift)[1])){
if(total_shift[i,2] < 0){
if(total_shift[i,4] < 0){
total_shift[i,6] = "Dem Loss/GOP Loss"
} else if(total_shift[i,4] > 0) {
total_shift[i,6] = "Dem Loss/GOP Gain"
} else {
total_shift[i,6] = "Dem Loss/GOP Equal"
}
} else if(total_shift[i,2] > 0){
if(total_shift[i,4] < 0){
total_shift[i,6] = "Dem Gain/GOP Loss"
} else if(total_shift[i,4] > 0){
total_shift[i,6] = "Dem Gain/GOP Gain"
} else {
total_shift[i,6] = "Dem Gain/GOP Equal"
}
} else {
if(total_shift[i,4] < 0){
total_shift[i,6] = "Dem Equal/GOP Loss"
} else if(total_shift[i,4] > 0){
total_shift[i,6] = "Dem Equal/GOP Gain"
} else {
total_shift[i,6] = "Dem Equal/GOP Equal"
}
}
}
#Entire country
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes for Parties"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values = c("lightpink", "deepskyblue","yellow","navy","grey","red","green"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
shift_US = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_US
#Break down county vote into regions of the US for easier viewing
#New England Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - New England"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
shift_NE = c$render() +
theme(legend.position = "right")
shift_NE
##Mid-Atlantic Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
shift_MA = c$render() +
theme(legend.position = "right")
shift_MA
##South East Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - South East"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","red","green"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
shift_SE = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_SE
##Mid West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Mid West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","red","green"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
shift_MW = c$render() +
theme(legend.position = "right")
shift_MW
##South West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - South West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","red","green"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
shift_SW = c$render() +
theme(legend.position = "right")
shift_SW
##West Region
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - West"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values = c("lightpink","yellow","navy","grey","red","green"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
shift_W = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
shift_W
#Explore Vote Count by Swing States
c = CountyChoropleth$new(total_shift)
c$title = "Shift from 2012 to 2016 by County Total Votes - Swing States"
c$add_state_outline = TRUE
c$legend = "Change in Votes"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("yellow","navy","lightpink","red","green"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
shift_swing = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
shift_swing
county_winner = votes[,c(1,8,9)]
county_winner$Trump_margin = (county_winner$Trump - county_winner$Clinton) * 100
for(i in seq(1:dim(county_winner)[1])){
if(county_winner[i,4] < -10){
county_winner[i,5] = "Clinton (>10%)"
}
if(county_winner[i,4] > -10 && county_winner[i,4] < -5){
county_winner[i,5] = "Clinton (5% - 10%)"
}
if(county_winner[i,4] > -5 && county_winner[i,4] < -2){
county_winner[i,5] = "Clinton (2% - 5%)"
}
if(county_winner[i,4] > -2 && county_winner[i,4] < 0){
county_winner[i,5] = "Clinton (<2%)"
}
if(county_winner[i,4] > 10){
county_winner[i,5] = "Trump (>10%)"
}
if(county_winner[i,4] < 10 && county_winner[i,4] > 5){
county_winner[i,5] = "Trump (5% - 10%)"
}
if(county_winner[i,4] < 5 && county_winner[i,4] > 2){
county_winner[i,5] = "Trump (2% - 5%)"
}
if(county_winner[i,4] < 2 && county_winner[i,4] > 0){
county_winner[i,5] = "Trump (<2%)"
}
}
colnames(county_winner)[1] = "region"
colnames(county_winner)[5] = "value"
plot(density(county_winner[,4]),
main = "Trump County Margin Density Plot",
ylab = "density")
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin %"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_US = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_US
#Break Down By Region for Easy Viewing
#New England
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - New England"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island"))
county_NE = c$render() +
theme(legend.position = "right")
county_NE
##Mid-Atlantic Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Mid-Atlantic"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("new york", "pennsylvania", "new jersey", "maryland","delaware"))
county_MA = c$render() +
theme(legend.position = "right")
county_MA
##South East Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - South East"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana"))
county_SE = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_SE
##Mid West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Mid West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas"))
county_MW = c$render() +
theme(legend.position = "right")
county_MW
##South West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - South West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("texas","oklahoma","new mexico","arizona"))
county_SW = c$render() +
theme(legend.position = "right")
county_SW
##West Region
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - West"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington","alaska","hawaii"))
county_W = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 2050, 2105, 2122, 2150, 2164, 2180, 2188, 2240, 2090, 2198,
## 15005, 2100, 2170, 2016, 2060, 2290, 2282, 2070, 2110, 2130, 2185, 2195,
## 2220, 2230, 2020, 2068, 2013, 2261, 2270, 2275
county_W
#Explore Vote Count by Swing States
c = CountyChoropleth$new(county_winner)
c$title = "County Winner Margin % - Swing States"
c$add_state_outline = TRUE
c$legend = "County Winner Margin"
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("lightblue","navy", "dodgerblue1","blue", "lightpink", "darkred","red", "firebrick"))
c$set_zoom(c("new hampshire","pennsylvania","ohio","michigan","north carolina","florida","arizona","iowa","nevada","wisconsin","virginia","colorado","minnesota","maine"))
county_swing = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_swing
votes$flips = NA
for(i in seq(1:dim(votes)[1])){
if(votes[i,9] > votes[i,8] && votes[i,19] > votes[i,20]){
votes[i,99] = "OBAMA to TRUMP"
} else if(votes[i,9] < votes[i,8] && votes[i,19] < votes[i,20]){
votes[i,99] = "ROMNEY to CLINTON"
} else {
votes[i,99] = "Solid County"
}
}
flips = votes[,c(1,99)]
colnames(flips) = c("region","value")
c = CountyChoropleth$new(flips)
c$title = "County Flips from 2012 to 2016"
c$add_state_outline = TRUE
c$legend = "County Status"
c$set_num_colors(3)
c$ggplot_scale = scale_fill_manual(values = c("red","blue","white"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_flips = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_flips
#Number of Clinton Flips
length(which(votes$flips == "ROMNEY to CLINTON"))
## [1] 20
#Number of Trump Flips
length(which(votes$flips == "OBAMA to TRUMP"))
## [1] 218
#Number of counties that did not change
length(which(votes$flips == "Solid County"))
## [1] 2874
#total religious population
religious = votes[,c(1,83)]
colnames(religious) = c("region","value")
c= CountyChoropleth$new(religious)
c$title = "Total Religious Population"
c$add_state_outline = TRUE
c$legend = "Religious Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_religious = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_religious
#Evangelical Population
evangelical = votes[,c(1,84)]
colnames(evangelical) = c("region","value")
evangelical$value = cut(evangelical$value, breaks = c(0,1,5,10,20,Inf))
c= CountyChoropleth$new(evangelical)
c$title = "Evangelical Population"
c$add_state_outline = TRUE
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","darkseagreen1", "greenyellow","green", "darkgreen"))
c$legend = "Evangelical Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_evangelical = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 31007, 49009, 46075, 46117, 48301, 38087, 49033, 31005,
## 48269, 51515, 48109, 31165, 32029, 30103, 38085, 30039, 48261, 31113, 8014,
## 32009, 31085, 31117, 8047, 49029, 8023, 16071, 16025, 16033, 16041
county_evangelical
#Catholic Population
catholic = votes[,c(1,87)]
colnames(catholic) = c("region","value")
catholic$value = cut(catholic$value, breaks = c(0,1,5,10,20,Inf))
c= CountyChoropleth$new(catholic)
c$title = "Catholic Population"
c$add_state_outline = TRUE
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","thistle1", "plum3","purple", "purple4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
c$legend = "Catholic Percentage"
county_catholic = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 28009, 28015, 28021, 28031, 13239, 47025, 47033, 47061,
## 31169, 39105, 47067, 47087, 47095, 47135, 47137, 40029, 48045, 48081,
## 38027, 38039, 18115, 40085, 40099, 40107, 30071, 48403, 48417, 48431,
## 30109, 31007, 35021, 47175, 45061, 31115, 28101, 28119, 28129, 40149,
## 29017, 48447, 49001, 29061, 29067, 29079, 37011, 42023, 17169, 41049,
## 29129, 37095, 37111, 37143, 13125, 49009, 49023, 48033, 29175, 29181,
## 29197, 29203, 13169, 45017, 46063, 46075, 13197, 13201, 13195, 13205,
## 13209, 13213, 13221, 13229, 13231, 13235, 13269, 20187, 20189, 20207,
## 21201, 13243, 22091, 13253, 13265, 13281, 13289, 13295, 13301, 48301,
## 20049, 56017, 56019, 54027, 54059, 56023, 51015, 51025, 51036, 51049,
## 51069, 51081, 51091, 51103, 53059, 54017, 54043, 21007, 21061, 21069,
## 21131, 38087, 49033, 49055, 1041, 1057, 1129, 21165, 51079, 12129, 13011,
## 13019, 13027, 21187, 13033, 13061, 5149, 28023, 1019, 13007, 13065, 28069,
## 22013, 47127, 8079, 47133, 47159, 45069, 46077, 51097, 1035, 1063, 1075,
## 12077, 29185, 30021, 40061, 40069, 21031, 5109, 5117, 13307, 13315, 32011,
## 21063, 32027, 21087, 13105, 21103, 31005, 49017, 48059, 21137, 48075, 1105,
## 1111, 17087, 17151, 48119, 48269, 18005, 13143, 13183, 13193, 13207, 13219,
## 13237, 13249, 37007, 37033, 37073, 51133, 51179, 51515, 13259, 13283,
## 20025, 20033, 37179, 1133, 21203, 21235, 22025, 22083, 48101, 48111, 48125,
## 48345, 48351, 51163, 51181, 51183, 48197, 48237, 51640, 51685, 51750,
## 53069, 8113, 18171, 20129, 5021, 21119, 21177, 19051, 19185, 37079, 20021,
## 28055, 28111, 28125, 28131, 47023, 40067, 40105, 40151, 28037, 40129,
## 28061, 31165, 31175, 28161, 29005, 29025, 32015, 30103, 38085, 39163,
## 47073, 47097, 47121, 48407, 48495, 29199, 31015, 31021, 24019, 29063,
## 37015, 37029, 40001, 40007, 40041, 47171, 29227, 37177, 31103, 37131, 5075,
## 5077, 16007, 26083, 18155, 13167, 13177, 13287, 54063, 51071, 51077, 51089,
## 51159, 51175, 51570, 51735, 47169, 5049, 5127, 47015, 38083, 13251, 13263,
## 13273, 21223, 21089, 51021, 51027, 51037, 29211, 8014, 30037, 31137, 31171,
## 8025, 31183, 32009, 32033, 30069, 30107, 31009, 31073, 31085, 31097, 31117,
## 28103, 38007, 12125, 13001, 45005, 45009, 47007, 47057, 13055, 47081,
## 47173, 45049, 46095, 13079, 41069, 48393, 48433, 48009, 48011, 48079,
## 48095, 8053, 48159, 8057, 48247, 48263, 48349, 51530, 8111, 54013, 49029,
## 49031, 51007, 51017, 51063, 51075, 51111, 1029, 1037, 1059, 1067, 1085,
## 5147, 6003, 1119, 1131, 1007, 1011, 6049, 1065, 13149, 13093, 13101, 13119,
## 8061, 12089, 13003, 13005, 13023, 13025, 13037, 13053, 12041, 1079, 8081,
## 8103, 5025, 5073, 5081, 5099, 13321, 5013, 16071, 16077, 5101, 5111, 17047,
## 5129, 16025, 16033, 16041, 13309, 16065, 13313, 13141, 13155, 13159, 13171,
## 13319, 20099, 22081, 21139, 21153, 21159, 13181, 19075, 16051, 20017, 13211
county_catholic
#Mormon Population
mormon = votes[,c(1,89)]
colnames(mormon) = c("region","value")
mormon$value = cut(mormon$value, breaks = c(0,1,5,10,20,Inf))
c= CountyChoropleth$new(mormon)
c$title = "Mormon Population"
c$add_state_outline = TRUE
c$legend = "Mormon Percentage"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","lightpink", "lightpink3","firebrick1", "firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_mormon = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 1001, 27097, 27099, 27111, 27119, 27129, 27133, 19059, 27141,
## 27149, 27157, 27163, 28005, 17005, 28009, 28011, 28015, 28021, 19149,
## 28045, 28051, 28053, 17009, 28067, 28073, 13239, 27077, 45031, 45035,
## 45037, 47017, 47021, 47025, 47033, 47041, 47047, 23015, 28041, 31149,
## 31151, 17035, 31169, 31181, 32005, 32013, 39105, 39107, 39117, 39125,
## 39129, 39139, 39149, 39161, 47067, 47071, 47077, 47083, 47087, 17065,
## 47095, 47101, 47107, 47109, 47111, 47119, 47135, 47137, 1107, 18051, 18059,
## 33019, 39167, 39175, 40003, 40011, 40025, 40029, 40035, 40045, 40055,
## 48045, 48055, 48065, 48073, 48077, 48081, 48087, 24047, 25007, 25015,
## 34033, 34041, 30055, 37195, 37199, 38005, 38009, 38027, 38029, 38037,
## 38039, 38043, 38047, 38053, 18115, 38059, 38063, 40085, 40091, 40099,
## 18125, 40101, 40107, 26003, 18131, 30091, 30097, 1121, 18139, 38065, 38067,
## 38071, 38075, 38081, 38089, 38093, 38097, 39001, 18153, 39011, 39013,
## 39019, 48369, 18173, 48385, 48387, 48399, 48403, 48417, 48419, 48427,
## 18179, 48431, 26019, 26027, 30109, 31007, 31011, 31013, 31019, 31025,
## 31029, 31039, 31051, 31057, 31071, 19009, 39033, 35019, 35021, 35025,
## 35033, 47143, 47147, 47155, 47161, 47167, 19021, 47175, 45055, 45061,
## 45065, 45071, 45073, 26041, 19031, 17131, 13191, 31079, 31083, 31087,
## 31099, 19039, 31101, 31105, 31115, 31119, 28097, 28101, 28119, 28129,
## 36013, 40137, 40145, 17079, 40149, 41013, 41021, 26087, 26093, 26095,
## 28141, 28143, 17103, 28155, 28157, 28163, 29017, 29023, 29035, 29041,
## 29045, 17107, 36041, 36051, 36073, 48435, 48437, 48447, 48483, 17129,
## 48489, 26071, 26131, 26135, 17139, 29051, 29055, 29057, 29067, 29073,
## 17145, 29087, 17155, 36093, 36099, 42023, 17169, 45089, 46033, 46039,
## 46045, 46051, 17175, 46059, 26159, 27011, 27017, 27025, 27041, 27043,
## 27059, 37037, 17189, 37059, 37069, 37075, 37077, 17203, 42029, 42049,
## 42067, 42073, 27063, 27065, 27069, 29111, 29125, 29129, 29139, 29141,
## 29153, 29157, 37095, 37115, 18033, 37121, 37139, 37143, 37153, 13125,
## 42109, 42119, 13129, 48017, 48023, 48033, 13131, 48035, 29171, 29186,
## 29197, 29207, 37169, 39071, 39077, 44001, 44003, 45001, 45017, 45023,
## 46063, 46065, 46069, 46075, 46079, 46085, 46097, 46101, 46115, 46117,
## 46127, 30005, 30019, 31129, 13197, 31133, 19015, 19019, 19023, 19027,
## 13201, 17091, 17093, 17099, 17101, 17117, 17125, 17127, 13189, 13195,
## 13199, 13205, 13209, 13213, 13221, 13229, 13231, 13225, 13235, 13241,
## 13247, 13257, 13267, 13269, 20183, 20187, 20197, 20207, 21191, 21201,
## 21205, 21207, 21221, 13243, 21233, 21237, 21239, 22005, 22091, 22093,
## 22099, 22113, 13253, 21013, 21015, 21027, 17137, 13265, 17147, 17153,
## 17191, 13279, 13281, 13289, 13301, 20041, 20047, 13285, 20053, 20063,
## 20073, 20077, 20087, 22023, 22031, 22035, 22041, 22043, 13303, 21039,
## 21041, 21057, 21065, 21079, 21085, 20027, 21091, 21097, 48267, 48271,
## 48281, 48285, 48287, 48291, 48293, 48295, 48301, 48305, 48317, 48327,
## 48331, 20039, 48333, 51810, 51840, 53003, 53017, 20049, 53023, 51600,
## 55107, 55111, 55119, 55125, 20065, 56017, 56019, 54025, 54033, 20075,
## 54045, 54051, 54053, 54059, 54065, 54069, 54075, 20085, 54087, 54091,
## 54093, 54099, 54103, 54105, 55011, 55013, 20095, 55019, 55021, 55023,
## 50003, 20105, 50011, 51005, 51011, 51019, 20115, 51031, 51033, 51036,
## 51045, 51049, 51051, 54081, 20127, 51069, 51081, 51091, 51095, 51103,
## 51119, 50025, 20135, 54017, 54029, 54043, 54055, 54073, 20145, 54095,
## 51115, 22111, 21001, 21005, 21007, 21019, 21037, 21049, 21061, 21069,
## 21081, 21109, 21125, 21131, 40037, 20163, 37187, 38003, 38013, 38025,
## 38041, 38055, 38069, 38087, 38095, 39021, 54109, 55027, 50013, 20185,
## 51009, 51023, 51035, 51053, 51065, 1027, 20195, 1115, 1129, 21165, 21181,
## 19043, 20203, 19073, 19081, 19091, 19115, 19131, 19147, 19157, 20205,
## 19165, 19187, 19197, 35059, 36077, 51079, 12129, 13011, 21187, 13033,
## 13039, 13061, 21189, 6091, 8011, 8019, 20023, 27071, 27079, 27087, 27101,
## 27107, 27125, 27131, 27143, 27159, 27169, 28007, 28035, 28043, 36113,
## 42025, 42053, 21219, 42075, 42099, 1013, 1019, 21229, 8049, 12121, 12133,
## 22001, 13007, 13035, 13047, 13065, 13075, 28057, 28069, 22119, 22125,
## 22013, 25003, 45033, 47051, 47075, 47099, 47127, 13085, 13099, 13117, 8055,
## 8065, 8079, 8089, 12007, 22077, 26059, 26113, 26119, 27013, 27023, 27033,
## 47133, 47159, 47165, 22105, 45059, 45069, 45087, 46025, 46037, 46049,
## 46057, 46067, 46077, 51097, 51109, 1035, 1039, 1053, 1063, 21003, 1075,
## 12047, 12067, 12077, 5011, 5037, 5055, 5065, 5079, 29115, 29121, 29163,
## 29173, 29185, 29223, 46091, 21025, 46107, 46119, 46129, 40057, 40061,
## 40069, 40075, 40093, 40103, 5141, 21031, 40133, 40141, 5117, 5135, 13307,
## 13311, 13315, 21053, 17017, 31123, 31127, 31143, 31167, 31179, 31185,
## 32011, 21063, 32027, 33017, 21077, 48363, 48373, 48405, 48413, 48421,
## 48455, 13083, 21087, 13111, 8071, 5095, 17025, 17039, 21099, 17061, 17069,
## 18049, 18075, 18093, 18113, 21103, 18119, 30049, 30075, 31001, 31005,
## 31017, 31027, 31041, 31059, 31069, 31077, 48493, 21121, 47183, 48019,
## 48059, 5145, 21137, 48075, 48089, 1105, 1111, 21143, 19003, 19025, 19035,
## 17071, 17075, 17087, 17123, 17151, 17199, 31107, 28077, 28081, 21157,
## 28107, 28123, 28133, 28145, 28159, 29003, 29039, 29059, 48107, 48119,
## 48131, 48143, 48153, 48163, 21175, 48173, 48175, 48219, 48255, 48269,
## 48279, 48289, 8033, 19047, 18027, 13127, 13133, 13163, 13183, 13193, 13207,
## 13219, 13227, 13237, 13249, 29083, 29093, 19063, 37003, 37007, 37009,
## 37073, 37085, 19067, 37123, 48299, 48335, 48343, 51127, 19079, 51515,
## 51620, 51680, 51720, 51740, 13259, 13271, 13283, 20025, 20031, 20033,
## 19095, 20069, 20081, 20093, 20101, 20111, 20123, 20133, 20139, 20151,
## 19105, 20159, 37157, 37173, 39069, 39115, 39127, 39137, 39165, 40005,
## 19121, 55053, 55067, 55078, 55099, 55121, 55135, 20179, 21197, 19137,
## 21203, 21215, 22007, 22021, 22025, 22047, 22059, 22067, 22083, 48101,
## 48105, 48109, 48111, 48125, 48133, 19151, 48145, 48151, 48193, 48345,
## 48351, 19161, 51137, 51139, 51169, 51171, 51181, 55035, 55037, 55041,
## 55043, 55047, 55049, 55057, 53043, 48197, 19181, 48211, 48235, 48237,
## 48239, 48243, 19195, 48253, 48259, 51191, 51195, 51520, 51610, 20005,
## 51640, 51678, 51683, 51685, 51710, 51750, 51790, 55075, 20015, 55077,
## 55083, 55091, 55093, 55097, 53069, 27073, 54005, 54009, 54011, 54015,
## 54019, 8093, 8107, 8113, 8115, 5125, 18171, 27085, 19005, 17059, 27093,
## 20129, 20137, 20153, 20175, 22065, 5021, 21119, 21127, 21133, 21149, 21155,
## 19051, 19065, 27113, 19071, 19083, 19097, 19109, 18111, 19119, 19133,
## 19143, 19167, 27121, 19173, 19185, 19191, 20001, 20007, 22071, 27075,
## 27123, 27127, 27151, 27155, 27165, 27167, 27173, 28019, 28027, 27147,
## 19117, 37053, 37079, 20021, 28055, 28063, 27153, 22087, 38021, 38023,
## 38033, 38049, 28105, 27161, 28111, 28113, 28125, 35051, 42043, 28003,
## 45029, 6043, 28013, 47023, 38051, 40067, 40077, 40089, 40095, 40105, 28017,
## 31139, 19037, 17083, 17085, 28037, 47055, 23025, 40129, 26001, 41025,
## 26089, 26105, 28147, 28061, 47029, 31165, 31175, 26011, 28065, 30079,
## 26109, 28149, 28161, 29005, 29025, 39095, 45011, 22121, 32029, 39111,
## 39123, 30105, 38085, 38099, 29031, 41055, 23009, 41063, 29113, 39163,
## 23029, 47089, 47091, 47097, 47115, 47121, 47123, 47131, 48377, 48391,
## 48407, 36079, 48443, 48461, 48473, 48479, 48495, 48501, 48007, 29199,
## 26013, 31015, 31021, 26153, 46071, 46081, 46089, 46105, 46109, 34001,
## 24029, 31043, 31049, 31061, 29069, 29081, 29089, 37015, 37023, 37029,
## 29133, 29143, 29149, 29155, 37113, 24039, 40001, 40007, 40021, 40023,
## 40033, 37189, 48051, 24041, 47153, 47163, 47171, 47181, 36097, 36105,
## 36115, 46009, 46021, 29209, 26009, 29227, 37177, 39067, 39073, 48071,
## 26015, 45077, 26053, 46047, 26023, 46053, 27007, 27015, 27021, 27027,
## 46135, 30025, 30033, 30045, 25005, 25019, 26037, 31091, 31095, 31103,
## 28093, 27031, 27049, 27055, 37131, 37137, 37149, 42103, 34025, 26057,
## 35005, 38001, 5041, 5057, 5075, 5077, 17013, 17021, 17027, 18023, 26083,
## 5105, 5107, 18123, 18147, 18155, 18163, 26085, 18039, 13145, 13167, 13177,
## 13179, 17161, 17171, 17173, 17181, 17193, 26097, 13287, 13291, 21105,
## 48311, 48315, 48337, 54071, 51077, 51083, 51089, 51093, 51105, 51117,
## 48261, 48155, 48161, 51131, 51149, 51159, 1021, 26117, 51165, 51175, 51177,
## 550
county_mormon
#Jewish Population
jewish = votes[,c(1,88)]
colnames(jewish) = c("region","value")
jewish$value = cut(jewish$value, breaks = c(0,1,2,5,10,Inf))
c= CountyChoropleth$new(jewish)
c$title = "Jewish Population"
c$add_state_outline = TRUE
c$legend = "Jewish Percentage"
c$set_num_colors(5)
c$ggplot_scale = scale_fill_manual(values = c("white","cyan", "cyan3","blue", "darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_jewish = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 1001, 1009, 1099, 16067, 27091, 27095, 27097, 27099, 27105,
## 27111, 27115, 27117, 27119, 16081, 27129, 27133, 19059, 27141, 27145,
## 27149, 27157, 27163, 27171, 28005, 17005, 28009, 28015, 28021, 28031,
## 28039, 19149, 28045, 28051, 28053, 17009, 28059, 28067, 28073, 23007,
## 13239, 27077, 45031, 45035, 45037, 47011, 47017, 47021, 47025, 47033,
## 47035, 47041, 47047, 47049, 47059, 17029, 47061, 23015, 23021, 23023,
## 23031, 28041, 31149, 31151, 31155, 17035, 31161, 31169, 31173, 31181,
## 32005, 32007, 32013, 32017, 32023, 17049, 39105, 39107, 39117, 39125,
## 39129, 39133, 39135, 39141, 17055, 39149, 39157, 39161, 47067, 47071,
## 47077, 47083, 47087, 17065, 47095, 47101, 47107, 47109, 47111, 47119,
## 47125, 47135, 47137, 24011, 1107, 18051, 24021, 24023, 32510, 33003, 33009,
## 18059, 33019, 39167, 39171, 39175, 40003, 40011, 18069, 40013, 40017,
## 40025, 40029, 40035, 40039, 40045, 40049, 40055, 37193, 18079, 47141,
## 48045, 48049, 48055, 48065, 48067, 48073, 48077, 48081, 18085, 48087,
## 48091, 18095, 34033, 35007, 30055, 30057, 37195, 37199, 38005, 38009,
## 38027, 38029, 38037, 38039, 38043, 38047, 38053, 18115, 38059, 38063,
## 47001, 47003, 40065, 40083, 40085, 40091, 40099, 18125, 40107, 40111,
## 40117, 40119, 40125, 26003, 26005, 18131, 26007, 30063, 30065, 30071,
## 30077, 30083, 30087, 30091, 30097, 30101, 1121, 18139, 38065, 38067, 38071,
## 38075, 38081, 38089, 38093, 38097, 38101, 39001, 18153, 39005, 39011,
## 39013, 39019, 48361, 48369, 48383, 18173, 48385, 48387, 48395, 48399,
## 48403, 48415, 48417, 48419, 48427, 18179, 48431, 26019, 26027, 26031,
## 26033, 30109, 31007, 31011, 18183, 31013, 31019, 31025, 31029, 31033,
## 31039, 31045, 31051, 31057, 31071, 19009, 31075, 39025, 39033, 39037,
## 39041, 35019, 19017, 35021, 35025, 35033, 35039, 47143, 47147, 47149,
## 47155, 47161, 47167, 19021, 47175, 47177, 45055, 45061, 45065, 45071,
## 45073, 26041, 26043, 19031, 26051, 26069, 17131, 13191, 31079, 31081,
## 31083, 31087, 31099, 19039, 31101, 31105, 31115, 31119, 28091, 28097,
## 28101, 28109, 28119, 17073, 28121, 28129, 35041, 35057, 35061, 17077,
## 36033, 40131, 40135, 40137, 40145, 17079, 40149, 41003, 41005, 41011,
## 41013, 41021, 41027, 26079, 26087, 26093, 26095, 26099, 26101, 28137,
## 28141, 28143, 28153, 17103, 28155, 28157, 28163, 29007, 29017, 29023,
## 29029, 29035, 29041, 29045, 17107, 36041, 36045, 36051, 36073, 36075,
## 48435, 17121, 48437, 48447, 48451, 48457, 48467, 48477, 48483, 17129,
## 48485, 48489, 48499, 49001, 49007, 26071, 26131, 26135, 26145, 17139,
## 26147, 26157, 29047, 29055, 29057, 29061, 29067, 29073, 17145, 29079,
## 29087, 37011, 37013, 37019, 37025, 13223, 36089, 17155, 36099, 36107,
## 36117, 42023, 17169, 45089, 46005, 46011, 46019, 46023, 46033, 46039,
## 46041, 46045, 46051, 17175, 46059, 26159, 27001, 27003, 27011, 27017,
## 27025, 27035, 17179, 27041, 27043, 27045, 27059, 37035, 37037, 37039,
## 37041, 17189, 37059, 37061, 37065, 37069, 37075, 37077, 37083, 37087,
## 17203, 42061, 42067, 18009, 41033, 41049, 41059, 41065, 41071, 18011,
## 27063, 27065, 27069, 29105, 29111, 29119, 29125, 29129, 29139, 29141,
## 18021, 29147, 29153, 29157, 29161, 29167, 37095, 37099, 37105, 37111,
## 37115, 18033, 37121, 37135, 37139, 37143, 37153, 13125, 42109, 42111,
## 42115, 42119, 49009, 49013, 13129, 49019, 49023, 48001, 48013, 48015,
## 48017, 48023, 48025, 48033, 13131, 48035, 48039, 29171, 29175, 29177,
## 29181, 29186, 29187, 29197, 29203, 13139, 29207, 29213, 29225, 30003,
## 37159, 37163, 37169, 37175, 13157, 39071, 39077, 39087, 13169, 45001,
## 45007, 45017, 45023, 45025, 46063, 46065, 46069, 13175, 46075, 46079,
## 46085, 46093, 46097, 46101, 46113, 46115, 46117, 46121, 13187, 46127,
## 30005, 30009, 30015, 30019, 30023, 30029, 30035, 30041, 31129, 13197,
## 31133, 31141, 19007, 19011, 19015, 19019, 19023, 19027, 19033, 17081,
## 13201, 17091, 17093, 17101, 17105, 17109, 17117, 17125, 17127, 13217,
## 13185, 13189, 13195, 13199, 13205, 13209, 13213, 13221, 13229, 13231,
## 13225, 13235, 13241, 13247, 13255, 13257, 13267, 13269, 20183, 20187,
## 20189, 13233, 20197, 20207, 21191, 21193, 21201, 21205, 21207, 21217,
## 21221, 21231, 13243, 21233, 21237, 21239, 22005, 22091, 22093, 22097,
## 22099, 22113, 13253, 22117, 21011, 21013, 21015, 21027, 21029, 17133,
## 17137, 17141, 13265, 17147, 17153, 17159, 17165, 17177, 17191, 13277,
## 18001, 13275, 13279, 13281, 13289, 13295, 13301, 20035, 20041, 20047,
## 13285, 20051, 20053, 20057, 20063, 20073, 20077, 20079, 20087, 22011,
## 22015, 13293, 22023, 22031, 22035, 22041, 22043, 22049, 21035, 13303,
## 21039, 21041, 21045, 21057, 21065, 21071, 21079, 21083, 21085, 20027,
## 21091, 21097, 21101, 21107, 48267, 48271, 48277, 48281, 48285, 48287,
## 20029, 48291, 48293, 48295, 48301, 48305, 48307, 48317, 48323, 48327,
## 48331, 20039, 48333, 51800, 53001, 53003, 53015, 53017, 53019, 20049,
## 53023, 53025, 51600, 55107, 55111, 55113, 55119, 55123, 55125, 20065,
## 56003, 56009, 56017, 56019, 54023, 54025, 54027, 54033, 20075, 54037,
## 54045, 54051, 54059, 54065, 54075, 48341, 54083, 20085, 54087, 54091,
## 54093, 54097, 54099, 54103, 54105, 55011, 55013, 20095, 55019, 55021,
## 55023, 53029, 49039, 49045, 49049, 20105, 50011, 50015, 56023, 51005,
## 51011, 51013, 51015, 51019, 20115, 51025, 51031, 51033, 51036, 51045,
## 51049, 51051, 51061, 51067, 20127, 51069, 51073, 51081, 51085, 51091,
## 51095, 51103, 49037, 51119, 20135, 53037, 53039, 53059, 53075, 54029,
## 54043, 54073, 54085, 20145, 54095, 51057, 51115, 22095, 22111, 21001,
## 21005, 21007, 21019, 21037, 20155, 21049, 21061, 21069, 21081, 21093,
## 21109, 21125, 21131, 40037, 40047, 20163, 37187, 38003, 38013, 38025,
## 38041, 38055, 38069, 38087, 38095, 39021, 54109, 55015, 55027, 49027,
## 49033, 49041, 49055, 50013, 20185, 51003, 51009, 51023, 51035, 51065,
## 1003, 1027, 1041, 1057, 20195, 1091, 1103, 1115, 1129, 21147, 21165, 21181,
## 19041, 19043, 20203, 19049, 19073, 19081, 19091, 19099, 19115, 19131,
## 19147, 19157, 20205, 19165, 19187, 19197, 39031, 39045, 35017, 35037,
## 35059, 21183, 36017, 36031, 36049, 51079, 12129, 13011, 13019, 13027,
## 21187, 13033, 13039, 13061, 5149, 6011, 6027, 6063, 21189, 6091, 8011,
## 8019, 8027, 20013, 20023, 27071, 27079, 21199, 27087, 27101, 27107, 27125,
## 27131, 27143, 27159, 27169, 28007, 28023, 21209, 28043, 36101, 42025,
## 42037, 42053, 21219, 42057, 42099, 42117, 42131, 1005, 1013, 1019, 21229,
## 6061, 8041, 8049, 12093, 12121, 12133, 22001, 13007, 13035, 13047, 13065,
## 13075, 28057, 28069, 22119, 22125, 22013, 23017, 24015, 26039, 45021,
## 45033, 47009, 47019, 47051, 47063, 47075, 47085, 1017, 22053, 47099, 47127,
## 6035, 13085, 13099, 13117, 8051, 8055, 8065, 8079, 8089, 8117, 12007,
## 12019, 22077, 12027, 26059, 26063, 26113, 26119, 26137, 26149, 27005,
## 22089, 27013, 27023, 27033, 27047, 27061, 47133, 47145, 47159, 47165,
## 22105, 45047, 45059, 45069, 45087, 46015, 46025, 46037, 46049, 46057,
## 22115, 46067, 46077, 51097, 51109, 1035, 1039, 1049, 1053, 1063, 21003,
## 1075, 1077, 12047, 12067, 12077, 4001, 4007, 5011, 5023, 21009, 5037, 5039,
## 5055, 5065, 5079, 29099, 29115, 29121, 29135, 29145, 21017, 29163, 29173,
## 29185, 29201, 29215, 29223, 30007, 30021, 46091, 21025, 46107, 46119,
## 46129, 40057, 40061, 40069, 40075, 40093, 40103, 40115, 5141, 21031, 40133,
## 40141, 41009, 5109, 5117, 5135, 21043, 13307, 13311, 13315, 16005, 16027,
## 16039, 16063, 16075, 21053, 17017, 30047, 31123, 31127, 31143, 31153,
## 31167, 31179, 31185, 32011, 21063, 32027, 41023, 41061, 42009, 21077,
## 48363, 48373, 48389, 48405, 48413, 48421, 48455, 48471, 13083, 21087,
## 13097, 13105, 13111, 8071, 5095, 17025, 17039, 21099, 17045, 17061, 17069,
## 18049, 18063, 18075, 18093, 18099, 18113, 21103, 18119, 18133, 18145,
## 35011, 30049, 30053, 30075, 30085, 31001, 21115, 31005, 31017, 31027,
## 31041, 31059, 31069, 31077, 31089, 48481, 48493, 21121, 48503, 49005,
## 49017, 47183, 47187, 48003, 48019, 48027, 48041, 48059, 5145, 21137, 48075,
## 48083, 48089, 1105, 1111, 1113, 1123, 21143, 18165, 19003, 19025, 19035,
## 17071, 17075, 17087, 21151, 17123, 17135, 17151, 17163, 17187, 17199,
## 31107, 28077, 28085, 21157, 28107, 28123, 28133, 28145, 28159, 29003,
## 29015, 29027, 29039, 21169, 29049, 29059, 29071, 48107, 48119, 48131,
## 48143, 48153, 48163, 48171, 21175, 48173, 48175, 48187, 48205, 48209,
## 48219, 48231, 48241, 48255, 19045, 48269, 48279, 48289, 8033, 8039, 8045,
## 6093, 6105,
county_jewish
#Total Christian Population
votes$Christian = votes$Evangelical + votes$Protestant + votes$Catholic + votes$Historically_Black + votes$Orthodox
christian = votes[,c(1,100)]
colnames(christian) = c("region","value")
christian$value = cut(christian$value, breaks = c(0,10,20,30,40,50,60,70,Inf))
c= CountyChoropleth$new(christian)
c$title = "Christian Population"
c$add_state_outline = TRUE
c$set_num_colors(8)
c$ggplot_scale = scale_fill_manual(values = c("white","yellow", "salmon","springgreen","brown1", "deepskyblue", "darkmagenta", "darkblue"))
c$legend = "Christian Percentage"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_christian = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 16081, 31007, 49009, 46075, 48301, 49033, 48269, 51515, 8014,
## 32009, 31117, 49029, 16025
county_christian
#change decimals to match the other percentage values
votes$Trump = votes$Trump * 100
votes$Clinton = votes$Clinton * 100
votes$Obama = votes$Obama * 100
votes$Romney = votes$Romney * 100
votes$White = votes$White * 100
votes$Black = votes$Black * 100
votes$Hispanic = votes$Hispanic * 100
votes$Clinton_Obama = votes$Clinton_Obama * 100
votes$Trump_Romney = votes$Trump_Romney * 100
votes$per_shift = votes$per_shift * 100
CO_Dev_Predict = votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:77,83:95,100)]
CO_Dev_Predict = na.omit(CO_Dev_Predict)
null_CO = lm(Clinton_Obama~1,data = CO_Dev_Predict)
full_CO = lm(Clinton_Obama~.,data = CO_Dev_Predict)
CO_Dev = step(null_CO,scope=list(upper=full_CO),data=CO_Dev_Predict,direction="both")
votes$CO_Dev_Pred = predict(CO_Dev,votes)
TR_Dev_Predict = votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:76,78,83:95,100)]
TR_Dev_Predict = na.omit(TR_Dev_Predict)
null_TR = lm(Trump_Romney~1, data = TR_Dev_Predict)
full_TR = lm(Trump_Romney~., data = TR_Dev_Predict)
TR_Dev = step(null_TR,scope=list(upper=full_TR),data=TR_Dev_Predict,direction="both")
votes$TR_Dev_Pred = predict(TR_Dev, votes)
Overall_Dev_Predict = votes[,c(14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,98,100)]
Overall_Dev_Predict = na.omit(Overall_Dev_Predict)
null_Overall = lm(per_shift~1, data = Overall_Dev_Predict)
full_Overall = lm(per_shift~., data = Overall_Dev_Predict)
Overall_Dev = step(null_Overall,scope=list(upper=full_Overall),data=Overall_Dev_Predict, direction = "both")
votes$Overall_Dev_Pred = predict(Overall_Dev, votes)
summary(CO_Dev)
##
## Call:
## lm(formula = Clinton_Obama ~ `Foreign Born` + Black + Obama +
## Edu_batchelors + NonEnglish + Protestant + `% Female 2014` +
## `Median Value of Owner-Occupied Housing Units` + Income +
## Hispanic + White + votes_gop_2012 + `Median Household Income` +
## `Manufacturers Shipments - 2007` + `Merchant Wholesaler Sales - 2007` +
## `Persons/Household` + `Persons Under 18` + Mormon + Edu_highschool +
## `Hispanic-Owned Firms` + `Travel Time to Work` + population_change +
## Catholic + Density + total_votes_2012 + votes_dem_2012 +
## population2010 + Households + `Private Nonfarm Establishments 2013` +
## `Living in Same House 1+ Years` + `Homeownership Rate` +
## `Building Permits` + `Private Nonfarm Employment` + `Total Number of Firms` +
## `Accommodation and Food Service Sales - 2007`, data = CO_Dev_Predict)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.197 -1.395 0.033 1.332 13.576
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -1.205e+01 1.893e+00
## `Foreign Born` 3.307e-02 1.745e-02
## Black 1.706e-01 6.937e-03
## Obama -2.241e-01 4.136e-03
## Edu_batchelors 3.360e-01 1.067e-02
## NonEnglish 5.391e-02 1.397e-02
## Protestant -2.703e-02 5.351e-03
## `% Female 2014` 2.108e-01 2.287e-02
## `Median Value of Owner-Occupied Housing Units` 1.377e-05 1.148e-06
## Income -7.876e-05 2.546e-05
## Hispanic 7.682e-02 9.086e-03
## White -4.670e-02 6.696e-03
## votes_gop_2012 -3.426e-04 6.895e-05
## `Median Household Income` -2.228e-05 1.158e-05
## `Manufacturers Shipments - 2007` 6.281e-08 1.790e-08
## `Merchant Wholesaler Sales - 2007` -3.791e-08 1.737e-08
## `Persons/Household` 1.661e+00 3.438e-01
## `Persons Under 18` -1.304e-01 2.172e-02
## Mormon 4.633e-02 1.104e-02
## Edu_highschool -3.837e-02 1.201e-02
## `Hispanic-Owned Firms` 3.846e-02 9.685e-03
## `Travel Time to Work` -2.961e-02 1.113e-02
## population_change 2.790e-02 1.257e-02
## Catholic -1.157e-02 5.809e-03
## Density -1.018e-04 3.865e-05
## total_votes_2012 3.509e-04 6.914e-05
## votes_dem_2012 -3.549e-04 6.928e-05
## population2010 -9.648e-06 2.803e-06
## Households 2.445e-05 9.459e-06
## `Private Nonfarm Establishments 2013` -2.103e-04 8.001e-05
## `Living in Same House 1+ Years` 4.256e-02 1.321e-02
## `Homeownership Rate` -2.841e-02 9.159e-03
## `Building Permits` -1.063e-04 6.592e-05
## `Private Nonfarm Employment` 6.774e-06 2.759e-06
## `Total Number of Firms` 2.486e-05 1.686e-05
## `Accommodation and Food Service Sales - 2007` -1.427e-07 9.784e-08
## t value Pr(>|t|)
## (Intercept) -6.366 2.24e-10 ***
## `Foreign Born` 1.895 0.058199 .
## Black 24.588 < 2e-16 ***
## Obama -54.186 < 2e-16 ***
## Edu_batchelors 31.483 < 2e-16 ***
## NonEnglish 3.860 0.000116 ***
## Protestant -5.052 4.64e-07 ***
## `% Female 2014` 9.217 < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units` 11.992 < 2e-16 ***
## Income -3.094 0.001992 **
## Hispanic 8.455 < 2e-16 ***
## White -6.975 3.74e-12 ***
## votes_gop_2012 -4.969 7.10e-07 ***
## `Median Household Income` -1.924 0.054468 .
## `Manufacturers Shipments - 2007` 3.509 0.000456 ***
## `Merchant Wholesaler Sales - 2007` -2.182 0.029171 *
## `Persons/Household` 4.833 1.41e-06 ***
## `Persons Under 18` -6.005 2.13e-09 ***
## Mormon 4.195 2.80e-05 ***
## Edu_highschool -3.195 0.001413 **
## `Hispanic-Owned Firms` 3.971 7.31e-05 ***
## `Travel Time to Work` -2.661 0.007830 **
## population_change 2.220 0.026488 *
## Catholic -1.992 0.046437 *
## Density -2.635 0.008460 **
## total_votes_2012 5.076 4.09e-07 ***
## votes_dem_2012 -5.122 3.21e-07 ***
## population2010 -3.442 0.000585 ***
## Households 2.584 0.009799 **
## `Private Nonfarm Establishments 2013` -2.629 0.008615 **
## `Living in Same House 1+ Years` 3.221 0.001289 **
## `Homeownership Rate` -3.102 0.001941 **
## `Building Permits` -1.613 0.106916
## `Private Nonfarm Employment` 2.456 0.014121 *
## `Total Number of Firms` 1.474 0.140459
## `Accommodation and Food Service Sales - 2007` -1.458 0.144885
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.206 on 3073 degrees of freedom
## Multiple R-squared: 0.804, Adjusted R-squared: 0.8018
## F-statistic: 360.3 on 35 and 3073 DF, p-value: < 2.2e-16
votes$model_error_CO = (votes$Clinton_Obama - votes$CO_Dev_Pred)
ME_CO = votes[,c(1,104)]
colnames(ME_CO) = c("region","value")
ME_CO$value = cut(ME_CO$value, breaks = c(-10,-5,-1,1,5,10,Inf))
c= CountyChoropleth$new(ME_CO)
c$title = "Model Deviation: Clinton-Obama"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(6)
c$ggplot_scale = scale_fill_manual(values=c("red","indianred1","white","lightcyan1","dodgerblue","darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_CO = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_CO
summary(TR_Dev)
##
## Call:
## lm(formula = Trump_Romney ~ Edu_batchelors + `Persons/Household` +
## Mormon + Romney + Black + Hispanic + Christian + White +
## Income + `Median Value of Owner-Occupied Housing Units` +
## `Travel Time to Work` + `% Female 2014` + votes_gop_2012 +
## `Nonemployer Establishments - 2013` + Edu_highschool + `Land Area (in sq miles)` +
## `Manufacturers Shipments - 2007` + `Persons Under 5` + Other_Religion +
## `Merchant Wholesaler Sales - 2007` + Orthodox + `Median Household Income` +
## Poverty + `Hispanic-Owned Firms` + `Private Nonfarm Employment` +
## Density + Veterans + Jewish + `Housing Units in Multi-Unit Structures` +
## Obama + `Accommodation and Food Service Sales - 2007` + `Black-Owned Firms` +
## `Homeownership Rate` + `% Change - Private Nonfarm Employment`,
## data = TR_Dev_Predict)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.3646 -1.4701 0.0437 1.6070 18.0019
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 4.489e+01 6.852e+00
## Edu_batchelors -4.210e-01 1.315e-02
## `Persons/Household` -2.480e+00 4.041e-01
## Mormon -2.518e-01 1.374e-02
## Romney -2.929e-01 6.622e-02
## Black -1.444e-01 9.071e-03
## Hispanic -1.195e-01 6.662e-03
## Christian 2.357e-02 3.811e-03
## White 4.487e-02 8.625e-03
## Income 1.868e-04 3.063e-05
## `Median Value of Owner-Occupied Housing Units` -2.004e-05 1.322e-06
## `Travel Time to Work` 5.907e-02 1.360e-02
## `% Female 2014` -1.231e-01 2.621e-02
## votes_gop_2012 -1.663e-05 4.492e-06
## `Nonemployer Establishments - 2013` 2.898e-05 5.693e-06
## Edu_highschool -5.161e-02 1.467e-02
## `Land Area (in sq miles)` -1.710e-04 4.487e-05
## `Manufacturers Shipments - 2007` -5.789e-08 1.954e-08
## `Persons Under 5` -2.449e-01 6.499e-02
## Other_Religion 4.432e-02 1.730e-02
## `Merchant Wholesaler Sales - 2007` 7.415e-08 1.927e-08
## Orthodox 6.386e-01 2.859e-01
## `Median Household Income` 5.861e-05 1.502e-05
## Poverty 5.837e-02 1.788e-02
## `Hispanic-Owned Firms` -2.763e-02 1.164e-02
## `Private Nonfarm Employment` -8.215e-06 1.956e-06
## Density 1.256e-04 4.285e-05
## Veterans 2.484e-05 1.231e-05
## Jewish 6.635e-01 2.557e-01
## `Housing Units in Multi-Unit Structures` -3.147e-02 1.198e-02
## Obama -1.031e-01 6.684e-02
## `Accommodation and Food Service Sales - 2007` 2.106e-07 1.211e-07
## `Black-Owned Firms` 1.669e-02 1.032e-02
## `Homeownership Rate` -1.999e-02 1.262e-02
## `% Change - Private Nonfarm Employment` -1.211e-02 8.407e-03
## t value Pr(>|t|)
## (Intercept) 6.550 6.71e-11 ***
## Edu_batchelors -32.004 < 2e-16 ***
## `Persons/Household` -6.136 9.54e-10 ***
## Mormon -18.325 < 2e-16 ***
## Romney -4.423 1.01e-05 ***
## Black -15.920 < 2e-16 ***
## Hispanic -17.940 < 2e-16 ***
## Christian 6.185 7.04e-10 ***
## White 5.203 2.09e-07 ***
## Income 6.099 1.20e-09 ***
## `Median Value of Owner-Occupied Housing Units` -15.156 < 2e-16 ***
## `Travel Time to Work` 4.343 1.45e-05 ***
## `% Female 2014` -4.696 2.77e-06 ***
## votes_gop_2012 -3.702 0.000217 ***
## `Nonemployer Establishments - 2013` 5.090 3.79e-07 ***
## Edu_highschool -3.518 0.000441 ***
## `Land Area (in sq miles)` -3.810 0.000142 ***
## `Manufacturers Shipments - 2007` -2.963 0.003066 **
## `Persons Under 5` -3.768 0.000168 ***
## Other_Religion 2.562 0.010451 *
## `Merchant Wholesaler Sales - 2007` 3.847 0.000122 ***
## Orthodox 2.233 0.025594 *
## `Median Household Income` 3.902 9.75e-05 ***
## Poverty 3.265 0.001107 **
## `Hispanic-Owned Firms` -2.374 0.017678 *
## `Private Nonfarm Employment` -4.199 2.76e-05 ***
## Density 2.930 0.003410 **
## Veterans 2.018 0.043691 *
## Jewish 2.595 0.009512 **
## `Housing Units in Multi-Unit Structures` -2.628 0.008643 **
## Obama -1.543 0.122909
## `Accommodation and Food Service Sales - 2007` 1.739 0.082180 .
## `Black-Owned Firms` 1.618 0.105734
## `Homeownership Rate` -1.584 0.113403
## `% Change - Private Nonfarm Employment` -1.440 0.149921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.713 on 3074 degrees of freedom
## Multiple R-squared: 0.7663, Adjusted R-squared: 0.7637
## F-statistic: 296.4 on 34 and 3074 DF, p-value: < 2.2e-16
votes$model_error_TR = (votes$Trump_Romney - votes$TR_Dev_Pred)
ME_TR = votes[,c(1,105)]
colnames(ME_TR) = c("region","value")
ME_TR$value = cut(ME_TR$value, breaks = c(-25,-10,-5,-1,1,5,10,Inf))
c= CountyChoropleth$new(ME_TR)
c$title = "Model Deviation: Trump-Romney"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_TR = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_TR
summary(Overall_Dev)
##
## Call:
## lm(formula = per_shift ~ Edu_batchelors + `Persons/Household` +
## Romney + Black + Hispanic + Mormon + Christian + White +
## `Median Value of Owner-Occupied Housing Units` + `Median Household Income` +
## votes_gop_2012 + `Nonemployer Establishments - 2013` + `% Female 2014` +
## `Travel Time to Work` + Income + `Manufacturers Shipments - 2007` +
## `Merchant Wholesaler Sales - 2007` + `Hispanic-Owned Firms` +
## NonEnglish + `Land Area (in sq miles)` + `Private Nonfarm Employment` +
## Density + Poverty + `Accommodation and Food Service Sales - 2007` +
## Other_Religion + Orthodox + `Homeownership Rate`, data = Overall_Dev_Predict)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.1120 -2.7512 0.0554 2.8877 21.0307
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 6.269e+01 3.142e+00
## Edu_batchelors -7.741e-01 2.015e-02
## `Persons/Household` -3.785e+00 6.466e-01
## Romney -4.155e-01 8.061e-03
## Black -3.153e-01 1.411e-02
## Hispanic -1.862e-01 1.892e-02
## Mormon -2.889e-01 2.334e-02
## Christian 3.595e-02 6.439e-03
## White 8.760e-02 1.430e-02
## `Median Value of Owner-Occupied Housing Units` -3.547e-05 2.159e-06
## `Median Household Income` 9.860e-05 2.498e-05
## votes_gop_2012 -1.997e-05 5.055e-06
## `Nonemployer Establishments - 2013` 5.304e-05 9.717e-06
## `% Female 2014` -3.182e-01 4.291e-02
## `Travel Time to Work` 7.542e-02 2.219e-02
## Income 2.528e-04 5.160e-05
## `Manufacturers Shipments - 2007` -1.233e-07 3.324e-08
## `Merchant Wholesaler Sales - 2007` 1.245e-07 3.016e-08
## `Hispanic-Owned Firms` -6.407e-02 1.994e-02
## NonEnglish -7.185e-02 2.352e-02
## `Land Area (in sq miles)` -1.827e-04 7.431e-05
## `Private Nonfarm Employment` -1.250e-05 3.229e-06
## Density 2.107e-04 6.942e-05
## Poverty 6.461e-02 2.886e-02
## `Accommodation and Food Service Sales - 2007` 4.275e-07 2.040e-07
## Other_Religion 6.023e-02 2.956e-02
## Orthodox 9.071e-01 4.852e-01
## `Homeownership Rate` 2.590e-02 1.619e-02
## t value Pr(>|t|)
## (Intercept) 19.955 < 2e-16 ***
## Edu_batchelors -38.412 < 2e-16 ***
## `Persons/Household` -5.854 5.29e-09 ***
## Romney -51.544 < 2e-16 ***
## Black -22.345 < 2e-16 ***
## Hispanic -9.842 < 2e-16 ***
## Mormon -12.378 < 2e-16 ***
## Christian 5.583 2.57e-08 ***
## White 6.128 1.00e-09 ***
## `Median Value of Owner-Occupied Housing Units` -16.429 < 2e-16 ***
## `Median Household Income` 3.947 8.09e-05 ***
## votes_gop_2012 -3.950 7.99e-05 ***
## `Nonemployer Establishments - 2013` 5.459 5.17e-08 ***
## `% Female 2014` -7.415 1.57e-13 ***
## `Travel Time to Work` 3.398 0.000687 ***
## Income 4.899 1.01e-06 ***
## `Manufacturers Shipments - 2007` -3.710 0.000211 ***
## `Merchant Wholesaler Sales - 2007` 4.128 3.76e-05 ***
## `Hispanic-Owned Firms` -3.213 0.001326 **
## NonEnglish -3.055 0.002272 **
## `Land Area (in sq miles)` -2.458 0.014022 *
## `Private Nonfarm Employment` -3.871 0.000111 ***
## Density 3.035 0.002428 **
## Poverty 2.239 0.025224 *
## `Accommodation and Food Service Sales - 2007` 2.096 0.036164 *
## Other_Religion 2.037 0.041696 *
## Orthodox 1.869 0.061659 .
## `Homeownership Rate` 1.600 0.109715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.663 on 3081 degrees of freedom
## Multiple R-squared: 0.7946, Adjusted R-squared: 0.7928
## F-statistic: 441.6 on 27 and 3081 DF, p-value: < 2.2e-16
votes$model_error_overall = (votes$per_shift - votes$Overall_Dev_Pred)
ME_Overall = votes[,c(1,106)]
colnames(ME_Overall) = c("region","value")
ME_Overall$value = cut(ME_Overall$value, breaks = c(-30,-10,-5,-1,1,5,10,Inf))
c= CountyChoropleth$new(ME_Overall)
c$title = "Model Deviation: 2016 Election Results"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_ME_Overall = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_ME_Overall
Predict_Clinton = votes[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Trump = votes[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Clinton = na.omit(Predict_Clinton)
Predict_Trump = na.omit(Predict_Trump)
#Clinton
null_Clinton = lm(Clinton~1,data = Predict_Clinton)
full_Clinton = lm(Clinton~.,data = Predict_Clinton)
Clinton_Dev = step(null_Clinton,scope=list(upper=full_Clinton),data=Predict_Clinton,direction="both")
votes$Clinton_Percent_Predict = predict(Clinton_Dev,votes)
#Trump
null_Trump = lm(Trump~1,data = Predict_Trump)
full_Trump = lm(Trump~.,data = Predict_Trump)
Trump_Dev = step(null_Trump,scope=list(upper=full_Trump),data=Predict_Trump,direction="both")
votes$Trump_Percent_Predict = predict(Trump_Dev,votes)
#Clinton
summary(Clinton_Dev)
##
## Call:
## lm(formula = Clinton ~ Obama + `Foreign Born` + Black + Edu_batchelors +
## NonEnglish + Protestant + `% Female 2014` + `Median Value of Owner-Occupied Housing Units` +
## Income + Hispanic + White + Veterans + `Nonemployer Establishments - 2013` +
## `Manufacturers Shipments - 2007` + Edu_highschool + `Hispanic-Owned Firms` +
## Mormon + `Persons Under 18` + Density + `Homeownership Rate` +
## `Persons/Household` + `Median Household Income` + `Living in Same House 1+ Years` +
## `Merchant Wholesaler Sales - 2007` + `Private Nonfarm Employment` +
## population_change + `Accommodation and Food Service Sales - 2007` +
## `Travel Time to Work` + Catholic + `Persons Under 5`, data = Predict_Clinton)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1088 -1.3843 0.0161 1.3411 13.5568
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -1.127e+01 1.928e+00
## Obama 7.770e-01 4.073e-03
## `Foreign Born` 3.118e-02 1.725e-02
## Black 1.686e-01 6.925e-03
## Edu_batchelors 3.445e-01 1.054e-02
## NonEnglish 5.385e-02 1.402e-02
## Protestant -2.949e-02 5.330e-03
## `% Female 2014` 2.101e-01 2.300e-02
## `Median Value of Owner-Occupied Housing Units` 1.338e-05 1.108e-06
## Income -7.167e-05 2.523e-05
## Hispanic 7.556e-02 9.076e-03
## White -4.592e-02 6.750e-03
## Veterans 1.770e-05 6.692e-06
## `Nonemployer Establishments - 2013` -2.429e-05 4.745e-06
## `Manufacturers Shipments - 2007` 6.367e-08 1.590e-08
## Edu_highschool -4.245e-02 1.210e-02
## `Hispanic-Owned Firms` 3.479e-02 9.615e-03
## Mormon 4.558e-02 1.110e-02
## `Persons Under 18` -8.695e-02 3.220e-02
## Density -1.292e-04 3.320e-05
## `Homeownership Rate` -3.152e-02 9.264e-03
## `Persons/Household` 1.545e+00 3.418e-01
## `Median Household Income` -2.617e-05 1.150e-05
## `Living in Same House 1+ Years` 3.951e-02 1.336e-02
## `Merchant Wholesaler Sales - 2007` -5.285e-08 1.539e-08
## `Private Nonfarm Employment` 5.876e-06 1.595e-06
## population_change 3.285e-02 1.246e-02
## `Accommodation and Food Service Sales - 2007` -2.008e-07 9.743e-08
## `Travel Time to Work` -2.642e-02 1.112e-02
## Catholic -1.074e-02 5.788e-03
## `Persons Under 5` -1.306e-01 8.055e-02
## t value Pr(>|t|)
## (Intercept) -5.843 5.68e-09 ***
## Obama 190.755 < 2e-16 ***
## `Foreign Born` 1.807 0.070821 .
## Black 24.352 < 2e-16 ***
## Edu_batchelors 32.671 < 2e-16 ***
## NonEnglish 3.840 0.000126 ***
## Protestant -5.532 3.43e-08 ***
## `% Female 2014` 9.132 < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units` 12.071 < 2e-16 ***
## Income -2.841 0.004523 **
## Hispanic 8.326 < 2e-16 ***
## White -6.803 1.23e-11 ***
## Veterans 2.645 0.008213 **
## `Nonemployer Establishments - 2013` -5.120 3.24e-07 ***
## `Manufacturers Shipments - 2007` 4.005 6.36e-05 ***
## Edu_highschool -3.507 0.000460 ***
## `Hispanic-Owned Firms` 3.619 0.000301 ***
## Mormon 4.107 4.11e-05 ***
## `Persons Under 18` -2.700 0.006968 **
## Density -3.891 0.000102 ***
## `Homeownership Rate` -3.402 0.000677 ***
## `Persons/Household` 4.520 6.41e-06 ***
## `Median Household Income` -2.276 0.022889 *
## `Living in Same House 1+ Years` 2.957 0.003130 **
## `Merchant Wholesaler Sales - 2007` -3.435 0.000601 ***
## `Private Nonfarm Employment` 3.684 0.000234 ***
## population_change 2.637 0.008410 **
## `Accommodation and Food Service Sales - 2007` -2.061 0.039420 *
## `Travel Time to Work` -2.377 0.017533 *
## Catholic -1.855 0.063671 .
## `Persons Under 5` -1.622 0.104997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.217 on 3078 degrees of freedom
## Multiple R-squared: 0.9793, Adjusted R-squared: 0.9791
## F-statistic: 4866 on 30 and 3078 DF, p-value: < 2.2e-16
Clinton_Deviation = data.frame(votes[,1])
Clinton_Deviation$deviation = votes$Clinton - votes$Clinton_Percent_Predict
colnames(Clinton_Deviation) = c("region", "value")
Clinton_Deviation$value = cut(Clinton_Deviation$value, breaks = c(-10,-5,-1,1,5,10,Inf))
c= CountyChoropleth$new(Clinton_Deviation)
c$title = "Clinton Percentage Deviation"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("red","indianred1","white","lightcyan1","dodgerblue","darkblue"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_Trump_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Clinton_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Clinton_Dev
#Trump
summary(Trump_Dev)
##
## Call:
## lm(formula = Trump ~ Obama + Edu_batchelors + `Persons/Household` +
## Black + Mormon + Hispanic + Romney + Christian + White +
## Income + `Median Value of Owner-Occupied Housing Units` +
## `Travel Time to Work` + `% Female 2014` + Edu_highschool +
## `Nonemployer Establishments - 2013` + `Manufacturers Shipments - 2007` +
## `Land Area (in sq miles)` + `Persons Under 5` + Other_Religion +
## `Hispanic-Owned Firms` + Orthodox + `Merchant Wholesaler Sales - 2007` +
## `Private Nonfarm Employment` + Density + `Accommodation and Food Service Sales - 2007` +
## `Median Household Income` + Poverty + `Housing Units in Multi-Unit Structures` +
## Jewish + `Homeownership Rate` + `Black-Owned Firms` + `% Change - Private Nonfarm Employment`,
## data = Predict_Trump)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.899 -1.461 0.013 1.619 18.015
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 4.825e+01 6.799e+00
## Obama -1.217e-01 6.669e-02
## Edu_batchelors -4.294e-01 1.296e-02
## `Persons/Household` -2.591e+00 4.034e-01
## Black -1.464e-01 9.070e-03
## Mormon -2.502e-01 1.375e-02
## Hispanic -1.182e-01 6.654e-03
## Romney 6.852e-01 6.600e-02
## Christian 2.424e-02 3.802e-03
## White 4.403e-02 8.616e-03
## Income 1.892e-04 3.068e-05
## `Median Value of Owner-Occupied Housing Units` -1.938e-05 1.314e-06
## `Travel Time to Work` 5.505e-02 1.347e-02
## `% Female 2014` -1.309e-01 2.614e-02
## Edu_highschool -5.089e-02 1.464e-02
## `Nonemployer Establishments - 2013` 2.827e-05 5.646e-06
## `Manufacturers Shipments - 2007` -6.086e-08 1.935e-08
## `Land Area (in sq miles)` -1.813e-04 4.458e-05
## `Persons Under 5` -2.393e-01 6.508e-02
## Other_Religion 4.307e-02 1.732e-02
## `Hispanic-Owned Firms` -2.987e-02 1.161e-02
## Orthodox 6.061e-01 2.860e-01
## `Merchant Wholesaler Sales - 2007` 6.665e-08 1.748e-08
## `Private Nonfarm Employment` -9.670e-06 1.749e-06
## Density 1.674e-04 3.937e-05
## `Accommodation and Food Service Sales - 2007` 2.676e-07 1.191e-07
## `Median Household Income` 5.470e-05 1.501e-05
## Poverty 5.698e-02 1.788e-02
## `Housing Units in Multi-Unit Structures` -3.918e-02 1.178e-02
## Jewish 5.920e-01 2.555e-01
## `Homeownership Rate` -2.515e-02 1.256e-02
## `Black-Owned Firms` 1.810e-02 1.031e-02
## `% Change - Private Nonfarm Employment` -1.259e-02 8.423e-03
## t value Pr(>|t|)
## (Intercept) 7.096 1.59e-12 ***
## Obama -1.825 0.068057 .
## Edu_batchelors -33.139 < 2e-16 ***
## `Persons/Household` -6.423 1.54e-10 ***
## Black -16.139 < 2e-16 ***
## Mormon -18.196 < 2e-16 ***
## Hispanic -17.763 < 2e-16 ***
## Romney 10.382 < 2e-16 ***
## Christian 6.376 2.09e-10 ***
## White 5.110 3.41e-07 ***
## Income 6.168 7.82e-10 ***
## `Median Value of Owner-Occupied Housing Units` -14.752 < 2e-16 ***
## `Travel Time to Work` 4.086 4.50e-05 ***
## `% Female 2014` -5.008 5.80e-07 ***
## Edu_highschool -3.477 0.000514 ***
## `Nonemployer Establishments - 2013` 5.008 5.81e-07 ***
## `Manufacturers Shipments - 2007` -3.145 0.001677 **
## `Land Area (in sq miles)` -4.068 4.86e-05 ***
## `Persons Under 5` -3.677 0.000240 ***
## Other_Religion 2.487 0.012946 *
## `Hispanic-Owned Firms` -2.573 0.010135 *
## Orthodox 2.119 0.034150 *
## `Merchant Wholesaler Sales - 2007` 3.813 0.000140 ***
## `Private Nonfarm Employment` -5.529 3.49e-08 ***
## Density 4.251 2.19e-05 ***
## `Accommodation and Food Service Sales - 2007` 2.247 0.024742 *
## `Median Household Income` 3.643 0.000274 ***
## Poverty 3.186 0.001455 **
## `Housing Units in Multi-Unit Structures` -3.324 0.000897 ***
## Jewish 2.317 0.020544 *
## `Homeownership Rate` -2.002 0.045351 *
## `Black-Owned Firms` 1.756 0.079248 .
## `% Change - Private Nonfarm Employment` -1.494 0.135179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.719 on 3076 degrees of freedom
## Multiple R-squared: 0.9701, Adjusted R-squared: 0.9698
## F-statistic: 3119 on 32 and 3076 DF, p-value: < 2.2e-16
Trump_Deviation = data.frame(votes[,1])
Trump_Deviation$deviation = votes$Trump - votes$Trump_Percent_Predict
colnames(Trump_Deviation) = c("region", "value")
Trump_Deviation$value = cut(Trump_Deviation$value, breaks = c(-25,-10,-5,-1,1,5,10,Inf))
c= CountyChoropleth$new(Trump_Deviation)
c$title = "Trump Percentage Deviation"
c$add_state_outline = TRUE
c$legend = "Model Deviation"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_Trump_Dev = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 48301, 51515, 8014, 32009
county_Trump_Dev
Predict_Votes_Clinton = votes[,c(5,14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Votes_Trump = votes[,c(6,14:16,19,20,26:28,30:35,40,42:63,67:76,83:95,100)]
Predict_Votes_Clinton = na.omit(Predict_Votes_Clinton)
Predict_Votes_Trump = na.omit(Predict_Votes_Trump)
#Clinton
null_Votes_Clinton = lm(votes_dem_2016~1,data = Predict_Votes_Clinton)
full_Votes_Clinton = lm(votes_dem_2016~.,data = Predict_Votes_Clinton)
Clinton_Votes_Dev = step(null_Votes_Clinton,scope=list(upper=full_Votes_Clinton),data=Predict_Votes_Clinton,direction="both")
votes$Clinton_Votes_Predict = predict(Clinton_Votes_Dev,votes)
#Trump
null_Votes_Trump = lm(votes_gop_2016~1,data = Predict_Votes_Trump)
full_Votes_Trump = lm(votes_gop_2016~.,data = Predict_Votes_Trump)
Trump_Votes_Dev = step(null_Votes_Trump,scope=list(upper=full_Votes_Trump),data=Predict_Votes_Trump,direction="both")
votes$Trump_Votes_Predict = predict(Trump_Votes_Dev,votes)
summary(Clinton_Votes_Dev)
##
## Call:
## lm(formula = votes_dem_2016 ~ votes_dem_2012 + `Nonemployer Establishments - 2013` +
## `Private Nonfarm Employment` + votes_gop_2012 + `Foreign Born` +
## `Manufacturers Shipments - 2007` + total_votes_2012 + `Housing Units 2014` +
## population2014 + population2010 + `Total Number of Firms` +
## Veterans + age65plus + Households + `Median Value of Owner-Occupied Housing Units` +
## `Housing Units in Multi-Unit Structures` + `Merchant Wholesaler Sales - 2007` +
## Hindu + Buddhist + Obama + Poverty + `Median Household Income` +
## population_change + White + Density + `Hispanic-Owned Firms` +
## `Persons Under 18` + Orthodox + Hispanic + NonEnglish + `Retail Sales - 2007` +
## `Private Nonfarm Establishments 2013` + Protestant, data = Predict_Votes_Clinton)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60622 -577 115 698 67806
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -8.581e+02 1.813e+03
## votes_dem_2012 1.738e+00 1.473e-01
## `Nonemployer Establishments - 2013` -5.240e-01 7.061e-02
## `Private Nonfarm Employment` 2.142e-02 4.849e-03
## votes_gop_2012 7.056e-01 1.459e-01
## `Foreign Born` 1.002e+02 3.153e+01
## `Manufacturers Shipments - 2007` -8.132e-05 3.297e-05
## total_votes_2012 -7.815e-01 1.463e-01
## `Housing Units 2014` -1.449e-01 1.476e-02
## population2014 4.417e-01 1.416e-02
## population2010 -4.341e-01 1.449e-02
## `Total Number of Firms` 6.543e-01 8.445e-02
## Veterans -2.424e-01 2.650e-02
## age65plus 9.401e+01 2.860e+01
## Households 1.656e-01 2.580e-02
## `Median Value of Owner-Occupied Housing Units` 9.597e-03 1.947e-03
## `Housing Units in Multi-Unit Structures` -3.756e+01 1.394e+01
## `Merchant Wholesaler Sales - 2007` -1.335e-04 3.143e-05
## Hindu 2.981e+03 7.133e+02
## Buddhist -2.497e+03 6.305e+02
## Obama -6.072e+01 7.643e+00
## Poverty 1.125e+02 2.450e+01
## `Median Household Income` 6.184e-02 1.672e-02
## population_change -9.928e+01 2.276e+01
## White -3.157e+01 6.946e+00
## Density -3.065e-01 7.859e-02
## `Hispanic-Owned Firms` 5.612e+01 1.759e+01
## `Persons Under 18` -9.458e+01 3.250e+01
## Orthodox -9.151e+02 4.279e+02
## Hispanic -4.545e+01 1.597e+01
## NonEnglish 4.866e+01 2.276e+01
## `Retail Sales - 2007` -3.713e-04 1.599e-04
## `Private Nonfarm Establishments 2013` 2.970e-01 1.730e-01
## Protestant 1.407e+01 9.408e+00
## t value Pr(>|t|)
## (Intercept) -0.473 0.636044
## votes_dem_2012 11.798 < 2e-16 ***
## `Nonemployer Establishments - 2013` -7.421 1.50e-13 ***
## `Private Nonfarm Employment` 4.417 1.04e-05 ***
## votes_gop_2012 4.834 1.40e-06 ***
## `Foreign Born` 3.178 0.001500 **
## `Manufacturers Shipments - 2007` -2.466 0.013703 *
## total_votes_2012 -5.341 9.94e-08 ***
## `Housing Units 2014` -9.821 < 2e-16 ***
## population2014 31.186 < 2e-16 ***
## population2010 -29.948 < 2e-16 ***
## `Total Number of Firms` 7.748 1.26e-14 ***
## Veterans -9.149 < 2e-16 ***
## age65plus 3.288 0.001021 **
## Households 6.418 1.60e-10 ***
## `Median Value of Owner-Occupied Housing Units` 4.930 8.66e-07 ***
## `Housing Units in Multi-Unit Structures` -2.695 0.007081 **
## `Merchant Wholesaler Sales - 2007` -4.247 2.23e-05 ***
## Hindu 4.179 3.01e-05 ***
## Buddhist -3.961 7.62e-05 ***
## Obama -7.944 2.72e-15 ***
## Poverty 4.593 4.54e-06 ***
## `Median Household Income` 3.698 0.000221 ***
## population_change -4.362 1.33e-05 ***
## White -4.545 5.69e-06 ***
## Density -3.901 9.80e-05 ***
## `Hispanic-Owned Firms` 3.190 0.001435 **
## `Persons Under 18` -2.910 0.003634 **
## Orthodox -2.138 0.032560 *
## Hispanic -2.846 0.004460 **
## NonEnglish 2.138 0.032636 *
## `Retail Sales - 2007` -2.321 0.020326 *
## `Private Nonfarm Establishments 2013` 1.717 0.086111 .
## Protestant 1.495 0.134972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4042 on 3075 degrees of freedom
## Multiple R-squared: 0.9969, Adjusted R-squared: 0.9968
## F-statistic: 2.974e+04 on 33 and 3075 DF, p-value: < 2.2e-16
summary(Trump_Votes_Dev)
##
## Call:
## lm(formula = votes_gop_2016 ~ votes_gop_2012 + `Private Nonfarm Employment` +
## `Housing Units 2014` + Density + population2014 + `Private Nonfarm Establishments 2013` +
## population2010 + total_votes_2012 + votes_dem_2012 + `Total Number of Firms` +
## `Accommodation and Food Service Sales - 2007` + `Building Permits` +
## Orthodox + Black + Mormon + population_change + `Persons Under 5` +
## Edu_batchelors + Romney + `Median Household Income` + `% Female 2014` +
## age65plus + `Median Value of Owner-Occupied Housing Units` +
## `Foreign Born` + `Persons Under 18` + Veterans + Hispanic +
## Protestant + Catholic + `Hispanic-Owned Firms` + `Manufacturers Shipments - 2007` +
## Women + `Black-Owned Firms` + Income + `Retail Sales - 2007` +
## Jewish, data = Predict_Votes_Trump)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54815 -862 -118 659 45629
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.573e+03 1.809e+03
## votes_gop_2012 3.170e+00 1.260e-01
## `Private Nonfarm Employment` -1.158e-01 4.580e-03
## `Housing Units 2014` 7.389e-02 1.001e-02
## Density 3.893e-01 6.653e-02
## population2014 -2.139e-01 1.645e-02
## `Private Nonfarm Establishments 2013` 2.026e+00 1.523e-01
## population2010 2.159e-01 1.696e-02
## total_votes_2012 -2.196e+00 1.258e-01
## votes_dem_2012 2.166e+00 1.273e-01
## `Total Number of Firms` -2.340e-01 3.303e-02
## `Accommodation and Food Service Sales - 2007` 1.694e-03 1.723e-04
## `Building Permits` 1.336e+00 1.927e-01
## Orthodox 1.898e+03 4.000e+02
## Black -2.860e+01 7.420e+00
## Mormon -4.510e+01 1.856e+01
## population_change 1.433e+02 2.183e+01
## `Persons Under 5` -4.508e+02 1.293e+02
## Edu_batchelors -9.937e+01 1.587e+01
## Romney -2.268e+01 6.733e+00
## `Median Household Income` 1.041e-01 1.832e-02
## `% Female 2014` 2.406e+02 4.065e+01
## age65plus -6.880e+01 2.838e+01
## `Median Value of Owner-Occupied Housing Units` -1.056e-02 1.899e-03
## `Foreign Born` 1.176e+02 2.523e+01
## `Persons Under 18` -1.391e+02 5.282e+01
## Veterans 9.149e-02 2.480e-02
## Hispanic -3.934e+01 9.977e+00
## Protestant -2.186e+01 8.846e+00
## Catholic 2.659e+01 9.652e+00
## `Hispanic-Owned Firms` 3.804e+01 1.620e+01
## `Manufacturers Shipments - 2007` 6.798e-05 2.974e-05
## Women 1.236e+01 6.169e+00
## `Black-Owned Firms` -2.780e+01 1.427e+01
## Income -8.685e-02 4.101e-02
## `Retail Sales - 2007` -2.510e-04 1.454e-04
## Jewish 6.189e+02 3.626e+02
## t value Pr(>|t|)
## (Intercept) -1.422 0.155084
## votes_gop_2012 25.160 < 2e-16 ***
## `Private Nonfarm Employment` -25.294 < 2e-16 ***
## `Housing Units 2014` 7.380 2.03e-13 ***
## Density 5.852 5.36e-09 ***
## population2014 -13.004 < 2e-16 ***
## `Private Nonfarm Establishments 2013` 13.301 < 2e-16 ***
## population2010 12.729 < 2e-16 ***
## total_votes_2012 -17.459 < 2e-16 ***
## votes_dem_2012 17.019 < 2e-16 ***
## `Total Number of Firms` -7.086 1.70e-12 ***
## `Accommodation and Food Service Sales - 2007` 9.831 < 2e-16 ***
## `Building Permits` 6.933 5.02e-12 ***
## Orthodox 4.745 2.18e-06 ***
## Black -3.854 0.000119 ***
## Mormon -2.430 0.015148 *
## population_change 6.564 6.13e-11 ***
## `Persons Under 5` -3.485 0.000499 ***
## Edu_batchelors -6.260 4.38e-10 ***
## Romney -3.368 0.000767 ***
## `Median Household Income` 5.681 1.46e-08 ***
## `% Female 2014` 5.920 3.57e-09 ***
## age65plus -2.425 0.015376 *
## `Median Value of Owner-Occupied Housing Units` -5.560 2.93e-08 ***
## `Foreign Born` 4.663 3.25e-06 ***
## `Persons Under 18` -2.633 0.008511 **
## Veterans 3.689 0.000229 ***
## Hispanic -3.943 8.22e-05 ***
## Protestant -2.471 0.013531 *
## Catholic 2.755 0.005904 **
## `Hispanic-Owned Firms` 2.349 0.018903 *
## `Manufacturers Shipments - 2007` 2.286 0.022331 *
## Women 2.004 0.045186 *
## `Black-Owned Firms` -1.947 0.051589 .
## Income -2.118 0.034252 *
## `Retail Sales - 2007` -1.726 0.084409 .
## Jewish 1.707 0.087898 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3768 on 3072 degrees of freedom
## Multiple R-squared: 0.9914, Adjusted R-squared: 0.9913
## F-statistic: 9828 on 36 and 3072 DF, p-value: < 2.2e-16
state_predict = data.frame(states[,c(1,10,11)])
state_predict$gop_votes = 0
state_predict$dem_votes = 0
state_predict$winner = NA
for(i in seq(1:dim(votes)[1])){
for(j in seq(1:dim(state_predict)[1])){
if(votes[i,12] == state_predict[j,3] && !is.na(votes[i,109]) && !is.na(votes[i,110])){
state_predict[j,4] = state_predict[j,4] + votes[i,110]
state_predict[j,5] = state_predict[j,5] + votes[i,109]
}
}
}
for(i in seq(1:dim(state_predict)[1])){
if(state_predict[i,4] > state_predict[i,5]){
state_predict[i,6] = "TRUMP"
}
if(state_predict[i,4] < state_predict[i,5]){
state_predict[i,6] = "CLINTON"
}
if(state_predict[i,4] == state_predict[i,5]){
state_predict[i,6] = "TIE"
}
}
colnames(state_predict)[2] = "region"
colnames(state_predict)[6] = "value"
state_predict$gop_margin = ((state_predict$gop_votes - state_predict$dem_votes) / (state_predict$gop_votes + state_predict$dem_votes)) * 100
c = StateChoropleth$new(state_predict)
c$title = "2016 Winner Predicted by Model"
c$add_state_outline = TRUE
c$legend = "Winner"
c$set_num_colors(3)
c$ggplot_scale = scale_fill_manual(values = c("blue","red","white"))
state_predict2 = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
state_predict2
state_margin = state_predict[,c(2,7)]
colnames(state_margin) = c("region", "value")
state_margin$value = cut(state_margin$value, breaks = c(-100,-10,-5,-1,1,5,10,100))
c = StateChoropleth$new(state_margin)
c$title = "2016 Winner Predicted by Model"
c$add_state_outline = TRUE
c$legend = "Model Predicted Win Margin"
c$set_num_colors(7)
c$ggplot_scale = scale_fill_manual(values=c("darkblue","dodgerblue","lightcyan","white","indianred1","red","firebrick4"))
state_predict3 = c$render() +
theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: alaska
state_predict3
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.3
corr_subset = subset(votes, select=c(Trump,Clinton,Romney,Obama,population_change,White,Black,Hispanic,Income,Edu_highschool,Edu_batchelors))
corr_subset_religion = subset(votes, select=c(Trump,Clinton,Romney,Obama,Evangelical,Protestant,Catholic,Jewish,Mormon,Christian))
correlation = cor(corr_subset,use = "complete.obs")
correlation_religion = cor(corr_subset_religion,use = "complete.obs")
demographics = corrplot(correlation, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
religion = corrplot(correlation_religion, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
demographics
## Trump Clinton Romney Obama
## Trump 1.00000000 -0.983706435 0.934274000 -0.93434018
## Clinton -0.98370644 1.000000000 -0.941509781 0.94669692
## Romney 0.93427400 -0.941509781 1.000000000 -0.99813021
## Obama -0.93434018 0.946696924 -0.998130214 1.00000000
## population_change -0.14387402 0.116002093 -0.005088279 0.00512069
## White 0.52966212 -0.593048979 0.478312388 -0.50034927
## Black -0.42517294 0.509268989 -0.390921003 0.41515417
## Hispanic -0.18842304 0.182939797 -0.079660815 0.08122373
## Income -0.23666574 0.197489495 -0.129504155 0.12250369
## Edu_highschool -0.09011927 0.007043676 -0.049315225 0.03366712
## Edu_batchelors -0.48724875 0.434224236 -0.300027325 0.29265640
## population_change White Black Hispanic
## Trump -0.143874022 0.52966212 -0.42517294 -0.18842304
## Clinton 0.116002093 -0.59304898 0.50926899 0.18293980
## Romney -0.005088279 0.47831239 -0.39092100 -0.07966081
## Obama 0.005120690 -0.50034927 0.41515417 0.08122373
## population_change 1.000000000 -0.01033989 -0.09887615 0.17060996
## White -0.010339887 1.00000000 -0.87293276 0.04397709
## Black -0.098876154 -0.87293276 1.00000000 -0.09528965
## Hispanic 0.170609962 0.04397709 -0.09528965 1.00000000
## Income 0.383692512 0.20182088 -0.23822097 -0.03902452
## Edu_highschool 0.206248751 0.29455104 -0.35619057 -0.38413600
## Edu_batchelors 0.435806228 -0.00106647 -0.08473178 0.01336191
## Income Edu_highschool Edu_batchelors
## Trump -0.23666574 -0.090119273 -0.48724875
## Clinton 0.19748949 0.007043676 0.43422424
## Romney -0.12950415 -0.049315225 -0.30002733
## Obama 0.12250369 0.033667123 0.29265640
## population_change 0.38369251 0.206248751 0.43580623
## White 0.20182088 0.294551044 -0.00106647
## Black -0.23822097 -0.356190573 -0.08473178
## Hispanic -0.03902452 -0.384136004 0.01336191
## Income 1.00000000 0.642988316 0.78062981
## Edu_highschool 0.64298832 1.000000000 0.60138653
## Edu_batchelors 0.78062981 0.601386528 1.00000000
religion
## Trump Clinton Romney Obama Evangelical
## Trump 1.00000000 -0.98369258 0.93423719 -0.93432101 0.23433745
## Clinton -0.98369258 1.00000000 -0.94144501 0.94664018 -0.14437003
## Romney 0.93423719 -0.94144501 1.00000000 -0.99814090 0.21845482
## Obama -0.93432101 0.94664018 -0.99814090 1.00000000 -0.19535489
## Evangelical 0.23433745 -0.14437003 0.21845482 -0.19535489 1.00000000
## Protestant 0.13208925 -0.14555522 0.04995778 -0.05100575 -0.04920175
## Catholic -0.15322308 0.13026914 -0.17466183 0.17006743 -0.37589364
## Jewish -0.33262451 0.33792807 -0.27718221 0.27882978 -0.17206418
## Mormon 0.01461194 -0.09757169 0.12890155 -0.13630309 -0.15133860
## Christian 0.14550766 -0.08575101 0.08054665 -0.06302473 0.62757409
## Protestant Catholic Jewish Mormon Christian
## Trump 0.13208925 -0.15322308 -0.33262451 0.01461194 0.14550766
## Clinton -0.14555522 0.13026914 0.33792807 -0.09757169 -0.08575101
## Romney 0.04995778 -0.17466183 -0.27718221 0.12890155 0.08054665
## Obama -0.05100575 0.17006743 0.27882978 -0.13630309 -0.06302473
## Evangelical -0.04920175 -0.37589364 -0.17206418 -0.15133860 0.62757409
## Protestant 1.00000000 0.22831091 -0.06893755 -0.11001702 0.60529619
## Catholic 0.22831091 1.00000000 0.13279224 -0.06883001 0.29035959
## Jewish -0.06893755 0.13279224 1.00000000 -0.02128366 -0.10046100
## Mormon -0.11001702 -0.06883001 -0.02128366 1.00000000 -0.21864417
## Christian 0.60529619 0.29035959 -0.10046100 -0.21864417 1.00000000
library(h2o)
## Warning: package 'h2o' was built under R version 3.3.3
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following object is masked from 'package:acs':
##
## apply
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
h2o.init(nthreads=-1,max_mem_size='6G')
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 1 hours 10 minutes
## H2O cluster version: 3.10.4.6
## H2O cluster version age: 2 months and 7 days
## H2O cluster name: H2O_started_from_R_onest_xjv369
## H2O cluster total nodes: 1
## H2O cluster total memory: 4.95 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 4
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 3.3.2 (2016-10-31)
predict.Clinton = as.h2o(Predict_Clinton)
##
|
| | 0%
|
|=================================================================| 100%
vars.Clinton = colnames(predict.Clinton)
x_vars.Clinton = c(vars.Clinton[2:59])
y_var.Clinton = vars.Clinton[1]
Clinton_features = h2o.randomForest(x=x_vars.Clinton,
y=y_var.Clinton,
seed=123,
training_frame = predict.Clinton,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|==== | 6%
|
|================================================= | 75%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_13
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 21 21 506920 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1861 1975 1914.47620
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 6.967564
## RMSE: 2.639614
## MAE: 1.987007
## RMSLE: 0.09819153
## Mean Residual Deviance : 6.967564
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 6.039764
## RMSE: 2.457593
## MAE: 1.84011
## RMSLE: 0.09185895
## Mean Residual Deviance : 6.039764
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid
## mae 1.8428727 0.07523045 1.8915086 1.7472016
## mse 6.0550523 0.55579835 6.005443 5.3264985
## r2 0.9741264 0.002257016 0.97279286 0.9773105
## residual_deviance 6.0550523 0.55579835 6.005443 5.3264985
## rmse 2.4556847 0.11105085 2.4506006 2.3079207
## rmsle 0.091426335 0.0060151466 0.093521066 0.091158554
## cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae 1.8171152 1.8222564 1.7392036 1.6625148 1.9629086
## mse 5.776114 5.5907035 5.639608 5.032604 6.8387527
## r2 0.9789275 0.97289354 0.97356176 0.97540796 0.970742
## residual_deviance 5.776114 5.5907035 5.639608 5.032604 6.8387527
## rmse 2.4033546 2.364467 2.374786 2.2433467 2.6151009
## rmsle 0.08283377 0.07795417 0.08026096 0.0945025 0.09372223
## cv_8_valid cv_9_valid cv_10_valid
## mae 1.8111181 1.980861 1.9940398
## mse 5.645194 7.41484 7.2807646
## r2 0.9784077 0.9684981 0.9727218
## residual_deviance 5.645194 7.41484 7.2807646
## rmse 2.3759618 2.7230203 2.6982892
## rmsle 0.10725975 0.09982335 0.09322696
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:50:18 14.166 sec 0
## 2 2017-07-04 13:50:18 14.258 sec 1 4.56398
## 3 2017-07-04 13:50:18 14.356 sec 2 4.18082
## 4 2017-07-04 13:50:19 14.449 sec 3 3.94459
## 5 2017-07-04 13:50:19 14.542 sec 4 3.79623
## training_mae training_deviance
## 1
## 2 3.24738 20.82995
## 3 2.99978 17.47927
## 4 2.87190 15.55976
## 5 2.79030 14.41133
##
## ---
## timestamp duration number_of_trees training_rmse
## 17 2017-07-04 13:50:20 15.567 sec 16 2.74439
## 18 2017-07-04 13:50:20 15.646 sec 17 2.71752
## 19 2017-07-04 13:50:20 15.728 sec 18 2.69444
## 20 2017-07-04 13:50:20 15.807 sec 19 2.65668
## 21 2017-07-04 13:50:20 15.885 sec 20 2.64683
## 22 2017-07-04 13:50:20 15.960 sec 21 2.63961
## training_mae training_deviance
## 17 2.06025 7.53168
## 18 2.04409 7.38492
## 19 2.03443 7.26000
## 20 2.00742 7.05795
## 21 1.99804 7.00569
## 22 1.98701 6.96756
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Obama 4839851.500000
## 2 Romney 3895734.000000
## 3 Accommodation and Food Service Sales - 2007 643884.750000
## 4 Housing Units in Multi-Unit Structures 403044.406250
## 5 Black 374463.468750
## scaled_importance percentage
## 1 1.000000 0.399807
## 2 0.804928 0.321816
## 3 0.133038 0.053190
## 4 0.083276 0.033294
## 5 0.077371 0.030933
##
## ---
## variable relative_importance scaled_importance
## 53 Mormon 4044.271240 0.000836
## 54 Manufacturers Shipments - 2007 2623.896729 0.000542
## 55 Orthodox 2301.390381 0.000476
## 56 Islamic 1621.955444 0.000335
## 57 Buddhist 752.422119 0.000155
## 58 Hindu 564.204590 0.000117
## percentage
## 53 0.000334
## 54 0.000217
## 55 0.000190
## 56 0.000134
## 57 0.000062
## 58 0.000047
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 4839851.500000
## 2 Romney 3895734.000000
## 3 Accommodation and Food Service Sales - 2007 643884.750000
## 4 Housing Units in Multi-Unit Structures 403044.406250
## 5 Black 374463.468750
## 6 White 316123.406250
## 7 Private Nonfarm Employment 311307.937500
## 8 Black-Owned Firms 212851.578125
## 9 Density 189867.125000
## 10 Foreign Born 97862.742188
## 11 Edu_batchelors 89795.242188
## 12 Median Value of Owner-Occupied Housing Units 61838.433594
## 13 Historically_Black 60181.757812
## 14 NonEnglish 46723.226562
## 15 Total Number of Firms 43642.250000
## 16 Homeownership Rate 33886.480469
## 17 population2010 30759.291016
## 18 Households 22673.718750
## 19 Persons/Household 22604.105469
## 20 population_change 21853.570312
## scaled_importance percentage
## 1 1.000000 0.399807
## 2 0.804928 0.321816
## 3 0.133038 0.053190
## 4 0.083276 0.033294
## 5 0.077371 0.030933
## 6 0.065317 0.026114
## 7 0.064322 0.025716
## 8 0.043979 0.017583
## 9 0.039230 0.015684
## 10 0.020220 0.008084
## 11 0.018553 0.007418
## 12 0.012777 0.005108
## 13 0.012435 0.004971
## 14 0.009654 0.003860
## 15 0.009017 0.003605
## 16 0.007002 0.002799
## 17 0.006355 0.002541
## 18 0.004685 0.001873
## 19 0.004670 0.001867
## 20 0.004515 0.001805
h2o.varimp_plot(Clinton_features, num_of_features = 20)
predict.Trump = as.h2o(Predict_Trump)
##
|
| | 0%
|
|=================================================================| 100%
vars.Trump = colnames(predict.Trump)
x_vars.Trump = c(vars.Trump[2:59])
y_var.Trump = vars.Trump[1]
Trump_features = h2o.randomForest(x=x_vars.Trump,
y=y_var.Trump,
seed=123,
training_frame = predict.Trump,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|= | 1%
|
|================== | 28%
|
|========================================== | 64%
|
|================================================ | 74%
|
|====================================================== | 83%
|
|=========================================================== | 91%
|
|=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_14
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 18 18 432638 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1844 1938 1906.88890
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 9.19168
## RMSE: 3.031778
## MAE: 2.273826
## RMSLE: 0.0644703
## Mean Residual Deviance : 9.19168
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 8.099768
## RMSE: 2.846009
## MAE: 2.0923
## RMSLE: 0.06425957
## Mean Residual Deviance : 8.099768
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid
## mae 2.0925953 0.08323233 2.084904 1.9466763
## mse 8.099447 0.7257394 7.811906 6.649725
## r2 0.96660876 0.0033785722 0.966138 0.9721209
## residual_deviance 8.099447 0.7257394 7.811906 6.649725
## rmse 2.8401918 0.12798046 2.7949786 2.578706
## rmsle 0.06314858 0.009248693 0.055708043 0.050057102
## cv_3_valid cv_4_valid cv_5_valid cv_6_valid
## mae 1.9816947 1.9424402 1.9840581 2.267307
## mse 7.452567 6.5643163 7.939525 9.761651
## r2 0.97353655 0.969534 0.96519685 0.95566523
## residual_deviance 7.452567 6.5643163 7.939525 9.761651
## rmse 2.729939 2.562092 2.8177164 3.1243641
## rmsle 0.055192083 0.05458503 0.07344787 0.060759306
## cv_7_valid cv_8_valid cv_9_valid cv_10_valid
## mae 2.107805 2.161643 2.21629 2.2331357
## mse 7.728803 9.073089 8.840077 9.172812
## r2 0.9676459 0.9672588 0.9623796 0.96661174
## residual_deviance 7.728803 9.073089 8.840077 9.172812
## rmse 2.7800725 3.0121567 2.9732268 3.028665
## rmsle 0.061720204 0.058766253 0.063254 0.09799592
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:50:34 12.464 sec 0
## 2 2017-07-04 13:50:34 12.543 sec 1 4.68151
## 3 2017-07-04 13:50:35 12.626 sec 2 4.46339
## 4 2017-07-04 13:50:35 12.702 sec 3 4.18316
## 5 2017-07-04 13:50:35 12.776 sec 4 4.16084
## 6 2017-07-04 13:50:35 12.854 sec 5 3.93286
## 7 2017-07-04 13:50:35 12.933 sec 6 3.75325
## 8 2017-07-04 13:50:35 13.008 sec 7 3.64086
## 9 2017-07-04 13:50:35 13.084 sec 8 3.58072
## 10 2017-07-04 13:50:35 13.162 sec 9 3.52243
## 11 2017-07-04 13:50:35 13.236 sec 10 3.44681
## 12 2017-07-04 13:50:35 13.315 sec 11 3.37018
## 13 2017-07-04 13:50:35 13.397 sec 12 3.27858
## 14 2017-07-04 13:50:35 13.476 sec 13 3.22463
## 15 2017-07-04 13:50:35 13.554 sec 14 3.17985
## 16 2017-07-04 13:50:36 13.629 sec 15 3.13531
## 17 2017-07-04 13:50:36 13.705 sec 16 3.09054
## 18 2017-07-04 13:50:36 13.784 sec 17 3.05684
## 19 2017-07-04 13:50:36 13.861 sec 18 3.03178
## training_mae training_deviance
## 1
## 2 3.50252 21.91649
## 3 3.33913 19.92182
## 4 3.13500 17.49880
## 5 3.05750 17.31255
## 6 2.91188 15.46737
## 7 2.78487 14.08689
## 8 2.70936 13.25584
## 9 2.66056 12.82155
## 10 2.61323 12.40750
## 11 2.56129 11.88051
## 12 2.51101 11.35811
## 13 2.44729 10.74907
## 14 2.40585 10.39823
## 15 2.35768 10.11145
## 16 2.34510 9.83017
## 17 2.31388 9.55142
## 18 2.29450 9.34427
## 19 2.27383 9.19168
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Romney 5155287.000000
## 2 Obama 2963044.500000
## 3 Housing Units in Multi-Unit Structures 528261.750000
## 4 Accommodation and Food Service Sales - 2007 367348.062500
## 5 Edu_batchelors 318496.687500
## scaled_importance percentage
## 1 1.000000 0.471791
## 2 0.574758 0.271166
## 3 0.102470 0.048344
## 4 0.071257 0.033618
## 5 0.061781 0.029148
##
## ---
## variable relative_importance scaled_importance
## 53 Manufacturers Shipments - 2007 4126.576660 0.000800
## 54 Orthodox 1809.172729 0.000351
## 55 Jewish 1600.463379 0.000310
## 56 Islamic 1317.801880 0.000256
## 57 Buddhist 751.266907 0.000146
## 58 Hindu 88.646675 0.000017
## percentage
## 53 0.000378
## 54 0.000166
## 55 0.000146
## 56 0.000121
## 57 0.000069
## 58 0.000008
h2o.varimp(Trump_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Romney 5155287.000000
## 2 Obama 2963044.500000
## 3 Housing Units in Multi-Unit Structures 528261.750000
## 4 Accommodation and Food Service Sales - 2007 367348.062500
## 5 Edu_batchelors 318496.687500
## 6 Black 273496.593750
## 7 Private Nonfarm Establishments 2013 187035.359375
## 8 Private Nonfarm Employment 159617.578125
## 9 Median Value of Owner-Occupied Housing Units 93521.976562
## 10 White 83486.484375
## 11 Foreign Born 82496.109375
## 12 Black-Owned Firms 54077.496094
## 13 Evangelical 53287.820312
## 14 NonEnglish 47489.898438
## 15 Historically_Black 36577.964844
## 16 population2010 29904.724609
## 17 population_change 28675.873047
## 18 Persons/Household 25468.931641
## 19 Hispanic 25454.554688
## 20 Housing Units 2014 24591.384766
## scaled_importance percentage
## 1 1.000000 0.471791
## 2 0.574758 0.271166
## 3 0.102470 0.048344
## 4 0.071257 0.033618
## 5 0.061781 0.029148
## 6 0.053052 0.025029
## 7 0.036280 0.017117
## 8 0.030962 0.014608
## 9 0.018141 0.008559
## 10 0.016194 0.007640
## 11 0.016002 0.007550
## 12 0.010490 0.004949
## 13 0.010337 0.004877
## 14 0.009212 0.004346
## 15 0.007095 0.003347
## 16 0.005801 0.002737
## 17 0.005562 0.002624
## 18 0.004940 0.002331
## 19 0.004938 0.002329
## 20 0.004770 0.002251
h2o.varimp_plot(Trump_features, num_of_features = 20)
predict.Swing = as.h2o(Overall_Dev_Predict)
##
|
| | 0%
|
|=================================================================| 100%
vars.Swing = colnames(predict.Swing)
x_vars.Swing = c(vars.Swing[1:60],vars.Swing[62])
y_var.Swing = vars.Swing[61]
Swing_features = h2o.randomForest(x=x_vars.Swing,
y=y_var.Swing,
seed=123,
training_frame = predict.Swing,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|= | 1%
|
|==================== | 30%
|
|=========================================== | 65%
|
|================================================ | 74%
|
|=========================================================== | 91%
|
|=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_15
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 20 20 482482 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1866 1988 1912.80000
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 23.38624
## RMSE: 4.835932
## MAE: 3.640499
## RMSLE: NaN
## Mean Residual Deviance : 23.38624
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 20.82928
## RMSE: 4.563911
## MAE: 3.454607
## RMSLE: NaN
## Mean Residual Deviance : 20.82928
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 3.456276 0.11210502 3.5003939 3.2188752 3.1686027
## mse 20.833815 1.5101509 21.30881 18.320068 17.43827
## r2 0.7998607 0.015021347 0.7985061 0.79740304 0.8198827
## residual_deviance 20.833815 1.5101509 21.30881 18.320068 17.43827
## rmse 4.55834 0.16635615 4.6161466 4.2801948 4.1759157
## rmsle 0.0 NaN NaN NaN NaN
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 3.3518422 3.576076 3.601392 3.483406 3.4794314
## mse 18.83481 22.753336 23.270376 21.153341 22.000587
## r2 0.8184314 0.77076167 0.7945231 0.77902 0.8145726
## residual_deviance 18.83481 22.753336 23.270376 21.153341 22.000587
## rmse 4.339909 4.7700458 4.823938 4.599276 4.6904783
## rmsle NaN NaN NaN NaN NaN
## cv_9_valid cv_10_valid
## mae 3.7058234 3.4769158
## mse 23.994698 19.263847
## r2 0.76946336 0.83604324
## residual_deviance 23.994698 19.263847
## rmse 4.8984385 4.38906
## rmsle NaN NaN
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:50:52 14.316 sec 0
## 2 2017-07-04 13:50:52 14.401 sec 1 6.80282
## 3 2017-07-04 13:50:52 14.485 sec 2 6.73403
## 4 2017-07-04 13:50:52 14.571 sec 3 6.47733
## 5 2017-07-04 13:50:52 14.654 sec 4 6.35175
## training_mae training_deviance
## 1
## 2 5.16590 46.27842
## 3 5.04244 45.34712
## 4 4.85260 41.95584
## 5 4.70634 40.34476
##
## ---
## timestamp duration number_of_trees training_rmse
## 16 2017-07-04 13:50:53 15.565 sec 15 4.98087
## 17 2017-07-04 13:50:53 15.648 sec 16 4.95881
## 18 2017-07-04 13:50:53 15.728 sec 17 4.93672
## 19 2017-07-04 13:50:53 15.816 sec 18 4.89476
## 20 2017-07-04 13:50:53 15.896 sec 19 4.86762
## 21 2017-07-04 13:50:54 15.978 sec 20 4.83593
## training_mae training_deviance
## 16 3.74797 24.80909
## 17 3.73866 24.58977
## 18 3.71992 24.37123
## 19 3.69023 23.95865
## 20 3.66254 23.69370
## 21 3.64050 23.38624
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance scaled_importance percentage
## 1 Foreign Born 780725.812500 1.000000 0.168096
## 2 Edu_batchelors 767971.062500 0.983663 0.165350
## 3 Romney 293125.968750 0.375453 0.063112
## 4 Obama 287345.687500 0.368049 0.061868
## 5 population_change 264071.625000 0.338239 0.056857
##
## ---
## variable relative_importance scaled_importance percentage
## 56 population2010 6572.494629 0.008418 0.001415
## 57 Orthodox 5954.307129 0.007627 0.001282
## 58 Jewish 3808.336426 0.004878 0.000820
## 59 Buddhist 3445.218506 0.004413 0.000742
## 60 Islamic 2736.301758 0.003505 0.000589
## 61 Hindu 1201.396729 0.001539 0.000259
h2o.varimp(Swing_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Foreign Born 780725.812500
## 2 Edu_batchelors 767971.062500
## 3 Romney 293125.968750
## 4 Obama 287345.687500
## 5 population_change 264071.625000
## 6 age65plus 191607.312500
## 7 Building Permits 176082.515625
## 8 Black 170653.609375
## 9 White 143307.312500
## 10 NonEnglish 135277.156250
## 11 Persons/Household 91002.992188
## 12 Median Value of Owner-Occupied Housing Units 88411.570312
## 13 Hispanic 88308.492188
## 14 Mormon 74074.476562
## 15 Evangelical 68271.750000
## 16 Christian 66716.500000
## 17 Historically_Black 65353.214844
## 18 Protestant 53810.617188
## 19 Persons Under 18 49755.976562
## 20 Income 45096.437500
## scaled_importance percentage
## 1 1.000000 0.168096
## 2 0.983663 0.165350
## 3 0.375453 0.063112
## 4 0.368049 0.061868
## 5 0.338239 0.056857
## 6 0.245422 0.041254
## 7 0.225537 0.037912
## 8 0.218583 0.036743
## 9 0.183557 0.030855
## 10 0.173271 0.029126
## 11 0.116562 0.019594
## 12 0.113243 0.019036
## 13 0.113111 0.019013
## 14 0.094879 0.015949
## 15 0.087447 0.014699
## 16 0.085454 0.014365
## 17 0.083708 0.014071
## 18 0.068924 0.011586
## 19 0.063730 0.010713
## 20 0.057762 0.009710
h2o.varimp_plot(Swing_features, num_of_features = 20)
corr_subset_swing = votes[,c(8,9,43,46,20,19,28,32,74,35,34,44,54)]
correlation_swing = cor(corr_subset_swing,use = "complete.obs")
swing = corrplot(correlation_swing, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
swing
## Clinton Trump Foreign Born Edu_batchelors
## Clinton 1.0000000 -0.9837064 0.391563486 0.43422424
## Trump -0.9837064 1.0000000 -0.395191097 -0.48724875
## Foreign Born 0.3915635 -0.3951911 1.000000000 0.36658290
## Edu_batchelors 0.4342242 -0.4872487 0.366582898 1.00000000
## Romney -0.9415098 0.9342740 -0.242001812 -0.30002733
## Obama 0.9466969 -0.9343402 0.244280092 0.29265640
## population_change 0.1160021 -0.1438740 0.316927722 0.43580623
## age65plus -0.3078179 0.3230607 -0.344869443 -0.23498620
## Building Permits 0.2819284 -0.2868319 0.420037480 0.34895577
## Black 0.5092690 -0.4251729 0.009439359 -0.08473178
## White -0.5930490 0.5296621 -0.111214215 -0.00106647
## NonEnglish 0.3203200 -0.3264593 0.822902882 0.15263309
## Persons/Household 0.1655445 -0.1705746 0.411876494 -0.06228232
## Romney Obama population_change age65plus
## Clinton -0.941509781 0.94669692 0.116002093 -0.3078179
## Trump 0.934274000 -0.93434018 -0.143874022 0.3230607
## Foreign Born -0.242001812 0.24428009 0.316927722 -0.3448694
## Edu_batchelors -0.300027325 0.29265640 0.435806228 -0.2349862
## Romney 1.000000000 -0.99813021 -0.005088279 0.2017157
## Obama -0.998130214 1.00000000 0.005120690 -0.2084752
## population_change -0.005088279 0.00512069 1.000000000 -0.4145400
## age65plus 0.201715712 -0.20847523 -0.414539997 1.0000000
## Building Permits -0.186745154 0.18824115 0.334357078 -0.2307868
## Black -0.390921003 0.41515417 -0.098876154 -0.2256728
## White 0.478312388 -0.50034927 -0.010339887 0.3126738
## NonEnglish -0.205383341 0.20735084 0.238685010 -0.3026660
## Persons/Household -0.049336849 0.06191844 0.275078327 -0.6041703
## Building Permits Black White NonEnglish
## Clinton 0.28192841 0.509268989 -0.59304898 0.32031996
## Trump -0.28683186 -0.425172945 0.52966212 -0.32645928
## Foreign Born 0.42003748 0.009439359 -0.11121421 0.82290288
## Edu_batchelors 0.34895577 -0.084731778 -0.00106647 0.15263309
## Romney -0.18674515 -0.390921003 0.47831239 -0.20538334
## Obama 0.18824115 0.415154166 -0.50034927 0.20735084
## population_change 0.33435708 -0.098876154 -0.01033989 0.23868501
## age65plus -0.23078680 -0.225672806 0.31267378 -0.30266597
## Building Permits 1.00000000 0.066066494 -0.12919138 0.28003297
## Black 0.06606649 1.000000000 -0.87293276 -0.07267724
## White -0.12919138 -0.872932756 1.00000000 -0.04805566
## NonEnglish 0.28003297 -0.072677242 -0.04805566 1.00000000
## Persons/Household 0.14228533 0.152645633 -0.30238451 0.50826960
## Persons/Household
## Clinton 0.16554447
## Trump -0.17057457
## Foreign Born 0.41187649
## Edu_batchelors -0.06228232
## Romney -0.04933685
## Obama 0.06191844
## population_change 0.27507833
## age65plus -0.60417034
## Building Permits 0.14228533
## Black 0.15264563
## White -0.30238451
## NonEnglish 0.50826960
## Persons/Household 1.00000000
clinton_1 = summary(lm(Clinton ~ Obama, data=votes))$adj.r.squared
clinton_2 = summary(lm(Clinton ~ Obama + votes[,73],data=votes))$adj.r.squared
clinton_3 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51],data=votes))$adj.r.squared
clinton_4 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51] + votes[,35],data=votes))$adj.r.squared
clinton_5 = summary(lm(Clinton ~ Obama + votes[,73] + votes[,51] + votes[,35] + votes[,34],data=votes))$adj.r.squared
Clinton_rsq = c(0,clinton_1,clinton_2,clinton_3,clinton_4,clinton_5)
trump_1 = summary(lm(Trump ~ Romney, data=votes))$adj.r.squared
trump_2 = summary(lm(Trump ~ Romney + votes[,51], data=votes))$adj.r.squared
trump_3 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73], data=votes))$adj.r.squared
trump_4 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73] + votes[,46], data=votes))$adj.r.squared
trump_5 = summary(lm(Trump ~ Romney + votes[,51] + votes[,73] + votes[,46] + votes[,35], data=votes))$adj.r.squared
Trump_rsq = c(0,trump_1,trump_2,trump_3,trump_4,trump_5)
shift_1 = summary(lm(per_shift ~ votes[,43],data=votes))$adj.r.squared
shift_2 = summary(lm(per_shift ~ votes[,43] + votes[,46],data=votes))$adj.r.squared
shift_3 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20],data=votes))$adj.r.squared
shift_4 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20]+ votes[,28],data=votes))$adj.r.squared
shift_5 = summary(lm(per_shift ~ votes[,43] + votes[,46] + votes[,20]+ votes[,28] + votes[,32],data=votes))$adj.r.squared
theshift = c(0,shift_1,shift_2,shift_3,shift_4,shift_5)
num = c(0,1,2,3,4,5)
theMax = max(Clinton_rsq,Trump_rsq,theshift)
theMin = min(Clinton_rsq,Trump_rsq,theshift)
plot(num,
Clinton_rsq,
col = "blue",
type = "b",
main = "Accuracy for Features Selected",
xlab = "Number of features",
ylab = "Accuracy",
ylim=c(0,theMax))
par(new=T)
plot(num,
Trump_rsq,
col = "red",
type = "b",
main = "Accuracy for Features Selected",
xlab = "Number of features",
ylab = "Accuracy",
ylim=c(0,theMax))
par(new=T)
plot(num,
theshift,
type = "b",
main = "Accuracy for Features Selected",
xlab = "Number of features",
ylab = "Accuracy",
ylim=c(0,theMax))
legend("bottomright",legend=c("Clinton","Trump","Shift"),fill=c('blue','red','black'))
cat("Clinton - Obama Variation:", clinton_1,"\n")
## Clinton - Obama Variation: 0.8962017
cat("Trump - Romney Variation:", trump_1)
## Trump - Romney Variation: 0.872827
theSubset = votes[,c(19,20,28,31,32,34,35,40,43,44,46,52,54,55,74,84,85,86,89,98,100)]
Swing_features = lm(per_shift ~ .,data=theSubset)
summary(Swing_features)
##
## Call:
## lm(formula = per_shift ~ ., data = theSubset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.398 -2.853 -0.068 2.912 21.873
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 3.952e+01 1.107e+01
## Obama 1.416e-01 1.134e-01
## Romney -2.469e-01 1.130e-01
## population_change -2.423e-02 2.646e-02
## `Persons Under 18` -1.791e-01 4.392e-02
## age65plus -1.088e-01 3.165e-02
## White 8.169e-02 1.379e-02
## Black -3.004e-01 1.609e-02
## Hispanic -2.064e-01 1.866e-02
## `Foreign Born` 4.090e-03 3.536e-02
## NonEnglish -6.240e-02 2.846e-02
## Edu_batchelors -8.624e-01 1.924e-02
## `Median Value of Owner-Occupied Housing Units` -2.780e-05 2.282e-06
## `Persons/Household` -1.980e+00 6.542e-01
## Income 4.023e-04 3.404e-05
## `Building Permits` -3.084e-04 7.782e-05
## Evangelical -2.372e-03 1.394e-02
## Protestant 3.383e-02 1.753e-02
## Historically_Black -9.208e-02 6.593e-02
## Mormon -2.943e-01 2.404e-02
## Christian 2.375e-02 1.254e-02
## t value Pr(>|t|)
## (Intercept) 3.571 0.000361 ***
## Obama 1.248 0.212091
## Romney -2.185 0.028976 *
## population_change -0.916 0.359974
## `Persons Under 18` -4.077 4.67e-05 ***
## age65plus -3.438 0.000595 ***
## White 5.923 3.51e-09 ***
## Black -18.663 < 2e-16 ***
## Hispanic -11.058 < 2e-16 ***
## `Foreign Born` 0.116 0.907924
## NonEnglish -2.192 0.028446 *
## Edu_batchelors -44.827 < 2e-16 ***
## `Median Value of Owner-Occupied Housing Units` -12.180 < 2e-16 ***
## `Persons/Household` -3.026 0.002500 **
## Income 11.815 < 2e-16 ***
## `Building Permits` -3.963 7.56e-05 ***
## Evangelical -0.170 0.864903
## Protestant 1.930 0.053674 .
## Historically_Black -1.397 0.162639
## Mormon -12.240 < 2e-16 ***
## Christian 1.894 0.058354 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.821 on 3088 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.78, Adjusted R-squared: 0.7785
## F-statistic: 547.3 on 20 and 3088 DF, p-value: < 2.2e-16
my_subset = votes[,c(28,32,34,40,46,51,52,55,74,84,89,98,100)]
my_subset = na.omit(my_subset)
my_subset$IncomeXPopChange = my_subset$population_change * my_subset$Edu_batchelors
my_model = lm(per_shift ~ .,data=my_subset)
summary(my_model)
##
## Call:
## lm(formula = per_shift ~ ., data = my_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.089 -3.983 -0.181 3.843 42.387
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.342e+01 1.069e+00
## population_change -3.108e-01 7.099e-02
## age65plus 1.292e-01 3.330e-02
## White 1.239e-01 9.031e-03
## Hispanic -2.364e-01 9.046e-03
## Edu_batchelors -7.792e-01 2.704e-02
## `Housing Units in Multi-Unit Structures` 1.402e-01 1.824e-02
## `Median Value of Owner-Occupied Housing Units` -9.084e-06 2.661e-06
## Income 1.465e-04 4.319e-05
## `Building Permits` -4.518e-04 1.035e-04
## Evangelical -2.670e-01 1.306e-02
## Mormon -5.276e-01 3.000e-02
## Christian 9.638e-02 9.980e-03
## IncomeXPopChange 5.898e-03 3.088e-03
## t value Pr(>|t|)
## (Intercept) 12.553 < 2e-16 ***
## population_change -4.378 1.24e-05 ***
## age65plus 3.880 0.000107 ***
## White 13.718 < 2e-16 ***
## Hispanic -26.133 < 2e-16 ***
## Edu_batchelors -28.811 < 2e-16 ***
## `Housing Units in Multi-Unit Structures` 7.686 2.03e-14 ***
## `Median Value of Owner-Occupied Housing Units` -3.414 0.000648 ***
## Income 3.393 0.000700 ***
## `Building Permits` -4.366 1.31e-05 ***
## Evangelical -20.443 < 2e-16 ***
## Mormon -17.584 < 2e-16 ***
## Christian 9.657 < 2e-16 ***
## IncomeXPopChange 1.910 0.056197 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.354 on 3095 degrees of freedom
## Multiple R-squared: 0.6169, Adjusted R-squared: 0.6153
## F-statistic: 383.4 on 13 and 3095 DF, p-value: < 2.2e-16
RedStates = votes[which(votes$state_abbr == "AL" |
votes$state_abbr == "Ak" |
votes$state_abbr == "AR" |
votes$state_abbr == "GA" |
votes$state_abbr == "ID" |
votes$state_abbr == "IN" |
votes$state_abbr == "KS" |
votes$state_abbr == "KY" |
votes$state_abbr == "LA" |
votes$state_abbr == "MS" |
votes$state_abbr == "MO" |
votes$state_abbr == "MT" |
votes$state_abbr == "NE" |
votes$state_abbr == "ND" |
votes$state_abbr == "OK" |
votes$state_abbr == "SC" |
votes$state_abbr == "SD" |
votes$state_abbr == "TN" |
votes$state_abbr == "TX" |
votes$state_abbr == "UT" |
votes$state_abbr == "WV" |
votes$state_abbr == "WY"),]
BlueStates = votes[which(votes$state_abbr == "CA" |
votes$state_abbr == "CT" |
votes$state_abbr == "DE" |
votes$state_abbr == "HI" |
votes$state_abbr == "IL" |
votes$state_abbr == "MA" |
votes$state_abbr == "MD" |
votes$state_abbr == "NJ" |
votes$state_abbr == "NM" |
votes$state_abbr == "NY" |
votes$state_abbr == "OR" |
votes$state_abbr == "RI" |
votes$state_abbr == "VT" |
votes$state_abbr == "WA" |
votes$state_abbr == "DC"),]
SwingStates = votes[which(votes$state_abbr == "AZ" |
votes$state_abbr == "CO" |
votes$state_abbr == "FL" |
votes$state_abbr == "IA" |
votes$state_abbr == "ME" |
votes$state_abbr == "MI" |
votes$state_abbr == "MN" |
votes$state_abbr == "NV" |
votes$state_abbr == "NH" |
votes$state_abbr == "NC" |
votes$state_abbr == "OH" |
votes$state_abbr == "PA" |
votes$state_abbr == "VA" |
votes$state_abbr == "WI"),]
Red_subset1 = RedStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Red_subset2 = RedStates[,c(8,9,52,55,74,76,84,85,87,89,100)]
Blue_subset1 = BlueStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Blue_subset2 = BlueStates[,c(8,9,52,55,74,76,84,85,87,89,100)]
Swing_subset1 = SwingStates[,c(8,9,19,20,28,32,34,35,40,46,51)]
Swing_subset2 = SwingStates[,c(8,9,52,55,74,76,84,85,87,89,100)]
Red_cor1 = cor(Red_subset1,use = "complete.obs")
Red_cor2 = cor(Red_subset2,use = "complete.obs")
Blue_cor1 = cor(Blue_subset1,use = "complete.obs")
Blue_cor2 = cor(Blue_subset2,use = "complete.obs")
Swing_cor1 = cor(Swing_subset1,use = "complete.obs")
Swing_cor2 = cor(Swing_subset2,use = "complete.obs")
Red1 = corrplot(Red_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Red States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Red2 = corrplot(Red_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Red States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Blue1 = corrplot(Blue_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Blue States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Blue2 = corrplot(Blue_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Blue States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Swing1 = corrplot(Swing_cor1, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Swing States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Swing2 = corrplot(Swing_cor2, method="shade", shade.col=NA, tl.col="black", tl.srt=45, addCoef.col="black", addcolorlabel="no",title="Correlation Swing States")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter
Red1
## Clinton Trump
## Clinton 1.0000000000 -0.97454405
## Trump -0.9745440481 1.00000000
## Obama 0.9518777225 -0.92517639
## Romney -0.9438171418 0.92263151
## population_change -0.0008282731 -0.04660033
## age65plus -0.3789883517 0.41369653
## White -0.7378736304 0.67726577
## Black 0.7051320888 -0.62306859
## Hispanic 0.1381746618 -0.13011204
## Edu_batchelors 0.1444483308 -0.23197460
## Housing Units in Multi-Unit Structures 0.4410465109 -0.49868421
## Obama Romney
## Clinton 0.95187772 -0.94381714
## Trump -0.92517639 0.92263151
## Obama 1.00000000 -0.99763351
## Romney -0.99763351 1.00000000
## population_change -0.07143798 0.06926463
## age65plus -0.28698264 0.27673943
## White -0.67174688 0.64601193
## Black 0.61854065 -0.59384754
## Hispanic 0.03879629 -0.02652863
## Edu_batchelors 0.01813029 -0.02790650
## Housing Units in Multi-Unit Structures 0.37285662 -0.38091848
## population_change age65plus
## Clinton -0.0008282731 -0.3789884
## Trump -0.0466003272 0.4136965
## Obama -0.0714379788 -0.2869826
## Romney 0.0692646298 0.2767394
## population_change 1.0000000000 -0.4263996
## age65plus -0.4263996223 1.0000000
## White 0.0824259634 0.3211914
## Black -0.1704546890 -0.2340054
## Hispanic 0.1502199469 -0.2361226
## Edu_batchelors 0.4217309267 -0.2105067
## Housing Units in Multi-Unit Structures 0.3144498390 -0.4693652
## White Black Hispanic
## Clinton -0.73787363 0.7051321 0.13817466
## Trump 0.67726577 -0.6230686 -0.13011204
## Obama -0.67174688 0.6185407 0.03879629
## Romney 0.64601193 -0.5938475 -0.02652863
## population_change 0.08242596 -0.1704547 0.15021995
## age65plus 0.32119141 -0.2340054 -0.23612255
## White 1.00000000 -0.8865513 0.13658735
## Black -0.88655127 1.0000000 -0.13612422
## Hispanic 0.13658735 -0.1361242 1.00000000
## Edu_batchelors 0.05997594 -0.1035870 -0.02227690
## Housing Units in Multi-Unit Structures -0.17539997 0.1517083 0.04081684
## Edu_batchelors
## Clinton 0.14444833
## Trump -0.23197460
## Obama 0.01813029
## Romney -0.02790650
## population_change 0.42173093
## age65plus -0.21050672
## White 0.05997594
## Black -0.10358702
## Hispanic -0.02227690
## Edu_batchelors 1.00000000
## Housing Units in Multi-Unit Structures 0.56789840
## Housing Units in Multi-Unit Structures
## Clinton 0.44104651
## Trump -0.49868421
## Obama 0.37285662
## Romney -0.38091848
## population_change 0.31444984
## age65plus -0.46936520
## White -0.17539997
## Black 0.15170825
## Hispanic 0.04081684
## Edu_batchelors 0.56789840
## Housing Units in Multi-Unit Structures 1.00000000
Red2
## Clinton Trump
## Clinton 1.0000000000 -0.974515165
## Trump -0.9745151649 1.000000000
## Median Value of Owner-Occupied Housing Units 0.0610092305 -0.155044672
## Income -0.2124907760 0.158887425
## Building Permits 0.1922322822 -0.213255026
## Density 0.3481460922 -0.361739767
## Evangelical 0.1128595894 0.004316957
## Protestant -0.1618435569 0.148459789
## Catholic -0.0002087637 -0.018311267
## Mormon -0.0930669961 -0.026693305
## Christian 0.0779975374 0.008299153
## Median Value of Owner-Occupied Housing Units
## Clinton 0.06100923
## Trump -0.15504467
## Median Value of Owner-Occupied Housing Units 1.00000000
## Income 0.54173767
## Building Permits 0.23435493
## Density 0.28197037
## Evangelical -0.33300854
## Protestant -0.16682105
## Catholic 0.01057547
## Mormon 0.19343391
## Christian -0.36374188
## Income Building Permits
## Clinton -0.21249078 0.192232282
## Trump 0.15888743 -0.213255026
## Median Value of Owner-Occupied Housing Units 0.54173767 0.234354925
## Income 1.00000000 0.222119730
## Building Permits 0.22211973 1.000000000
## Density 0.21786151 0.540524116
## Evangelical -0.30707663 -0.093813491
## Protestant 0.34503330 -0.056483009
## Catholic 0.31091343 0.027574507
## Mormon -0.02879495 0.001536522
## Christian 0.05798660 -0.087482416
## Density Evangelical
## Clinton 0.34814609 0.112859589
## Trump -0.36173977 0.004316957
## Median Value of Owner-Occupied Housing Units 0.28197037 -0.333008541
## Income 0.21786151 -0.307076633
## Building Permits 0.54052412 -0.093813491
## Density 1.00000000 -0.083974026
## Evangelical -0.08397403 1.000000000
## Protestant -0.11856447 -0.129093351
## Catholic 0.03755652 -0.333344857
## Mormon -0.03292587 -0.231137536
## Christian -0.09244637 0.634474140
## Protestant Catholic
## Clinton -0.16184356 -0.0002087637
## Trump 0.14845979 -0.0183112670
## Median Value of Owner-Occupied Housing Units -0.16682105 0.0105754667
## Income 0.34503330 0.3109134344
## Building Permits -0.05648301 0.0275745067
## Density -0.11856447 0.0375565232
## Evangelical -0.12909335 -0.3333448574
## Protestant 1.00000000 0.2570057154
## Catholic 0.25700572 1.0000000000
## Mormon -0.11371929 -0.0619180298
## Christian 0.54605106 0.3155763655
## Mormon Christian
## Clinton -0.093066996 0.077997537
## Trump -0.026693305 0.008299153
## Median Value of Owner-Occupied Housing Units 0.193433915 -0.363741880
## Income -0.028794946 0.057986604
## Building Permits 0.001536522 -0.087482416
## Density -0.032925873 -0.092446368
## Evangelical -0.231137536 0.634474140
## Protestant -0.113719289 0.546051061
## Catholic -0.061918030 0.315576366
## Mormon 1.000000000 -0.285805773
## Christian -0.285805773 1.000000000
Blue1
## Clinton Trump Obama
## Clinton 1.0000000 -0.9918399 0.9481736
## Trump -0.9918399 1.0000000 -0.9421126
## Obama 0.9481736 -0.9421126 1.0000000
## Romney -0.9468479 0.9467359 -0.9980754
## population_change 0.5269019 -0.5156305 0.3801360
## age65plus -0.3944511 0.3726769 -0.3131466
## White -0.6003950 0.5788362 -0.5549216
## Black 0.4210651 -0.3574980 0.4049580
## Hispanic 0.2843771 -0.3213727 0.1910168
## Edu_batchelors 0.6805001 -0.6651660 0.5388131
## Housing Units in Multi-Unit Structures 0.6713731 -0.6272824 0.6161323
## Romney population_change
## Clinton -0.9468479 0.5269019
## Trump 0.9467359 -0.5156305
## Obama -0.9980754 0.3801360
## Romney 1.0000000 -0.3737910
## population_change -0.3737910 1.0000000
## age65plus 0.2991700 -0.5423292
## White 0.5471660 -0.4371531
## Black -0.3820461 0.2140206
## Hispanic -0.2064690 0.2165409
## Edu_batchelors -0.5339278 0.4866104
## Housing Units in Multi-Unit Structures -0.5993005 0.4412370
## age65plus White Black
## Clinton -0.3944511 -0.6003950 0.421065076
## Trump 0.3726769 0.5788362 -0.357497951
## Obama -0.3131466 -0.5549216 0.404958047
## Romney 0.2991700 0.5471660 -0.382046062
## population_change -0.5423292 -0.4371531 0.214020639
## age65plus 1.0000000 0.4273832 -0.354017194
## White 0.4273832 1.0000000 -0.683284217
## Black -0.3540172 -0.6832842 1.000000000
## Hispanic -0.2954593 -0.1764290 -0.004243443
## Edu_batchelors -0.2737985 -0.3099415 0.207935685
## Housing Units in Multi-Unit Structures -0.4738699 -0.5381249 0.475199861
## Hispanic Edu_batchelors
## Clinton 0.284377061 0.68050014
## Trump -0.321372712 -0.66516605
## Obama 0.191016758 0.53881314
## Romney -0.206469029 -0.53392783
## population_change 0.216540879 0.48661041
## age65plus -0.295459296 -0.27379855
## White -0.176428961 -0.30994146
## Black -0.004243443 0.20793569
## Hispanic 1.000000000 -0.08052484
## Edu_batchelors -0.080524835 1.00000000
## Housing Units in Multi-Unit Structures 0.098611945 0.56518678
## Housing Units in Multi-Unit Structures
## Clinton 0.67137314
## Trump -0.62728237
## Obama 0.61613233
## Romney -0.59930050
## population_change 0.44123700
## age65plus -0.47386994
## White -0.53812491
## Black 0.47519986
## Hispanic 0.09861195
## Edu_batchelors 0.56518678
## Housing Units in Multi-Unit Structures 1.00000000
Blue2
## Clinton Trump
## Clinton 1.0000000 -0.99183992
## Trump -0.9918399 1.00000000
## Median Value of Owner-Occupied Housing Units 0.6740587 -0.64892088
## Income 0.4979443 -0.45928947
## Building Permits 0.4425732 -0.41828456
## Density 0.3690518 -0.33463697
## Evangelical -0.5309847 0.54567940
## Protestant -0.3586681 0.37904502
## Catholic 0.2162151 -0.20294355
## Mormon -0.1143945 0.07901817
## Christian -0.3495691 0.38300046
## Median Value of Owner-Occupied Housing Units
## Clinton 0.67405870
## Trump -0.64892088
## Median Value of Owner-Occupied Housing Units 1.00000000
## Income 0.75292542
## Building Permits 0.43977266
## Density 0.40532727
## Evangelical -0.44951518
## Protestant -0.34503358
## Catholic 0.12911329
## Mormon -0.04495867
## Christian -0.36698841
## Income Building Permits
## Clinton 0.49794428 0.44257316
## Trump -0.45928947 -0.41828456
## Median Value of Owner-Occupied Housing Units 0.75292542 0.43977266
## Income 1.00000000 0.32052556
## Building Permits 0.32052556 1.00000000
## Density 0.28644014 0.35294770
## Evangelical -0.35655004 -0.17451973
## Protestant -0.01468794 -0.20886607
## Catholic 0.30976489 0.02966670
## Mormon -0.15407197 -0.06221267
## Christian -0.01613330 -0.17749160
## Density Evangelical
## Clinton 0.369051753 -0.530984650
## Trump -0.334636975 0.545679402
## Median Value of Owner-Occupied Housing Units 0.405327268 -0.449515182
## Income 0.286440140 -0.356550045
## Building Permits 0.352947702 -0.174519734
## Density 1.000000000 -0.115012485
## Evangelical -0.115012485 1.000000000
## Protestant -0.128793888 0.245035603
## Catholic 0.008292259 -0.321400482
## Mormon -0.073912357 -0.002749382
## Christian -0.112337707 0.575112737
## Protestant Catholic
## Clinton -0.35866808 0.216215148
## Trump 0.37904502 -0.202943554
## Median Value of Owner-Occupied Housing Units -0.34503358 0.129113291
## Income -0.01468794 0.309764891
## Building Permits -0.20886607 0.029666701
## Density -0.12879389 0.008292259
## Evangelical 0.24503560 -0.321400482
## Protestant 1.00000000 -0.068016889
## Catholic -0.06801689 1.000000000
## Mormon -0.10760836 -0.181057304
## Christian 0.59981263 0.446901351
## Mormon Christian
## Clinton -0.114394545 -0.3495691
## Trump 0.079018170 0.3830005
## Median Value of Owner-Occupied Housing Units -0.044958671 -0.3669884
## Income -0.154071973 -0.0161333
## Building Permits -0.062212666 -0.1774916
## Density -0.073912357 -0.1123377
## Evangelical -0.002749382 0.5751127
## Protestant -0.107608358 0.5998126
## Catholic -0.181057304 0.4469014
## Mormon 1.000000000 -0.1908860
## Christian -0.190885997 1.0000000
Swing1
## Clinton Trump Obama
## Clinton 1.0000000 -0.9892751 0.90576185
## Trump -0.9892751 1.0000000 -0.91029514
## Obama 0.9057618 -0.9102951 1.00000000
## Romney -0.8991358 0.9092335 -0.99810214
## population_change 0.3278722 -0.3409298 0.10787016
## age65plus -0.3190696 0.3443082 -0.21992734
## White -0.5912501 0.5226715 -0.42897573
## Black 0.5060647 -0.4248166 0.35780779
## Hispanic 0.2120297 -0.2246970 0.09593826
## Edu_batchelors 0.5456011 -0.5878235 0.32076008
## Housing Units in Multi-Unit Structures 0.5976703 -0.6187282 0.47124876
## Romney population_change
## Clinton -0.8991358 0.32787221
## Trump 0.9092335 -0.34092983
## Obama -0.9981021 0.10787016
## Romney 1.0000000 -0.10604488
## population_change -0.1060449 1.00000000
## age65plus 0.2233397 -0.37098491
## White 0.4100177 -0.13914053
## Black -0.3354549 0.03650365
## Hispanic -0.1018363 0.27500009
## Edu_batchelors -0.3266181 0.60398889
## Housing Units in Multi-Unit Structures -0.4737638 0.44690743
## age65plus White Black
## Clinton -0.3190696 -0.59125006 0.50606467
## Trump 0.3443082 0.52267152 -0.42481657
## Obama -0.2199273 -0.42897573 0.35780779
## Romney 0.2233397 0.41001768 -0.33545490
## population_change -0.3709849 -0.13914053 0.03650365
## age65plus 1.0000000 0.24101868 -0.17398099
## White 0.2410187 1.00000000 -0.90880670
## Black -0.1739810 -0.90880670 1.00000000
## Hispanic -0.1773258 -0.11917949 0.04269540
## Edu_batchelors -0.3308759 -0.04486616 -0.03874371
## Housing Units in Multi-Unit Structures -0.4489890 -0.16534998 0.11930294
## Hispanic Edu_batchelors
## Clinton 0.21202972 0.54560111
## Trump -0.22469699 -0.58782349
## Obama 0.09593826 0.32076008
## Romney -0.10183626 -0.32661812
## population_change 0.27500009 0.60398889
## age65plus -0.17732582 -0.33087594
## White -0.11917949 -0.04486616
## Black 0.04269540 -0.03874371
## Hispanic 1.00000000 0.08983758
## Edu_batchelors 0.08983758 1.00000000
## Housing Units in Multi-Unit Structures 0.19953222 0.61690438
## Housing Units in Multi-Unit Structures
## Clinton 0.5976703
## Trump -0.6187282
## Obama 0.4712488
## Romney -0.4737638
## population_change 0.4469074
## age65plus -0.4489890
## White -0.1653500
## Black 0.1193029
## Hispanic 0.1995322
## Edu_batchelors 0.6169044
## Housing Units in Multi-Unit Structures 1.0000000
Swing2
## Clinton Trump
## Clinton 1.000000000 -0.989300933
## Trump -0.989300933 1.000000000
## Median Value of Owner-Occupied Housing Units 0.402785367 -0.434275030
## Income 0.284544153 -0.331124057
## Building Permits 0.299642444 -0.290135133
## Density 0.488268321 -0.478424811
## Evangelical -0.026784760 0.091844644
## Protestant -0.101535213 0.068936886
## Catholic 0.108388537 -0.147416241
## Mormon -0.023343074 0.001023867
## Christian 0.009763309 -0.002181567
## Median Value of Owner-Occupied Housing Units
## Clinton 0.40278537
## Trump -0.43427503
## Median Value of Owner-Occupied Housing Units 1.00000000
## Income 0.79667459
## Building Permits 0.21334092
## Density 0.36844217
## Evangelical -0.13990840
## Protestant -0.17653233
## Catholic 0.02430069
## Mormon 0.01655113
## Christian -0.18403348
## Income Building Permits
## Clinton 0.28454415 0.299642444
## Trump -0.33112406 -0.290135133
## Median Value of Owner-Occupied Housing Units 0.79667459 0.213340916
## Income 1.00000000 0.253087892
## Building Permits 0.25308789 1.000000000
## Density 0.32256662 0.269280687
## Evangelical -0.26953397 -0.135179848
## Protestant 0.08096759 -0.164962871
## Catholic 0.23383761 0.018358020
## Mormon -0.05321687 0.001369272
## Christian -0.01562642 -0.168045842
## Density Evangelical
## Clinton 0.48826832 -0.02678476
## Trump -0.47842481 0.09184464
## Median Value of Owner-Occupied Housing Units 0.36844217 -0.13990840
## Income 0.32256662 -0.26953397
## Building Permits 0.26928069 -0.13517985
## Density 1.00000000 -0.01801773
## Evangelical -0.01801773 1.00000000
## Protestant -0.06292108 0.03454216
## Catholic 0.09371452 -0.35160241
## Mormon -0.04378564 -0.05221740
## Christian 0.02012994 0.51367896
## Protestant Catholic
## Clinton -0.10153521 0.10838854
## Trump 0.06893689 -0.14741624
## Median Value of Owner-Occupied Housing Units -0.17653233 0.02430069
## Income 0.08096759 0.23383761
## Building Permits -0.16496287 0.01835802
## Density -0.06292108 0.09371452
## Evangelical 0.03454216 -0.35160241
## Protestant 1.00000000 0.26213572
## Catholic 0.26213572 1.00000000
## Mormon -0.12657358 -0.02562251
## Christian 0.74227862 0.42261901
## Mormon Christian
## Clinton -0.023343074 0.009763309
## Trump 0.001023867 -0.002181567
## Median Value of Owner-Occupied Housing Units 0.016551130 -0.184033481
## Income -0.053216873 -0.015626422
## Building Permits 0.001369272 -0.168045842
## Density -0.043785640 0.020129944
## Evangelical -0.052217401 0.513678964
## Protestant -0.126573577 0.742278624
## Catholic -0.025622510 0.422619012
## Mormon 1.000000000 -0.124979431
## Christian -0.124979431 1.000000000
Red_Features_clinton = RedStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Red_Features_Trump = RedStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Red_Features_Shift = RedStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]
Blue_Features_clinton = BlueStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Blue_Features_Trump = BlueStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Blue_Features_Shift = BlueStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]
Swing_Features_clinton = SwingStates[,c(8,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Swing_Features_Trump = SwingStates[,c(9,19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,100)]
Swing_Features_Shift = SwingStates[,c(19,20,26:28,30:35,40,42:63,67:76,83:85,87,89,98,100)]
library(h2o)
h2o.init(nthreads=-1,max_mem_size='6G')
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 1 hours 11 minutes
## H2O cluster version: 3.10.4.6
## H2O cluster version age: 2 months and 7 days
## H2O cluster name: H2O_started_from_R_onest_xjv369
## H2O cluster total nodes: 1
## H2O cluster total memory: 4.93 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 4
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 3.3.2 (2016-10-31)
red.Clinton = as.h2o(Red_Features_clinton)
##
|
| | 0%
|
|=================================================================| 100%
vars.Clinton = colnames(red.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]
Clinton_features = h2o.randomForest(x=x_vars.Clinton,
y=y_var.Clinton,
seed=123,
training_frame = red.Clinton,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|================================================ | 74%
|
|=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_16
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 16 16 223335 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1076 1145 1105.62500
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 7.696872
## RMSE: 2.774324
## MAE: 2.055538
## RMSLE: 0.1196493
## Mean Residual Deviance : 7.696872
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 6.281383
## RMSE: 2.506269
## MAE: 1.83702
## RMSLE: 0.1095244
## Mean Residual Deviance : 6.281383
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid
## mae 1.8355962 0.1141399 1.6837574 2.1626139
## mse 6.2755084 0.97351587 4.92367 8.635242
## r2 0.9686772 0.0047762212 0.9764076 0.9572694
## residual_deviance 6.2755084 0.97351587 4.92367 8.635242
## rmse 2.490159 0.19315411 2.2189345 2.9385784
## rmsle 0.109176315 0.0054995855 0.09629298 0.12078045
## cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae 1.8410962 1.6558785 1.9910244 1.7926586 1.9807729
## mse 6.09917 4.9993973 7.562722 6.7656913 8.041615
## r2 0.97353154 0.96766365 0.96832085 0.96336627 0.964731
## residual_deviance 6.09917 4.9993973 7.562722 6.7656913 8.041615
## rmse 2.4696498 2.2359333 2.7500403 2.6010942 2.8357742
## rmsle 0.118361875 0.10802966 0.11506413 0.10264545 0.11514403
## cv_8_valid cv_9_valid cv_10_valid
## mae 1.8185035 1.8189781 1.6106786
## mse 6.213245 5.1234756 4.390857
## r2 0.9634585 0.9703982 0.98162526
## residual_deviance 6.213245 5.1234756 4.390857
## rmse 2.492638 2.2635095 2.0954373
## rmsle 0.09909064 0.109159976 0.107193954
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:03 4.864 sec 0
## 2 2017-07-04 13:51:03 4.903 sec 1 4.31247
## 3 2017-07-04 13:51:03 4.942 sec 2 4.13126
## 4 2017-07-04 13:51:03 4.979 sec 3 3.87425
## 5 2017-07-04 13:51:03 5.017 sec 4 3.77081
## 6 2017-07-04 13:51:03 5.055 sec 5 3.57081
## 7 2017-07-04 13:51:03 5.093 sec 6 3.45167
## 8 2017-07-04 13:51:03 5.139 sec 7 3.27751
## 9 2017-07-04 13:51:03 5.178 sec 8 3.23221
## 10 2017-07-04 13:51:03 5.221 sec 9 3.22226
## 11 2017-07-04 13:51:03 5.259 sec 10 3.11909
## 12 2017-07-04 13:51:03 5.298 sec 11 3.08332
## 13 2017-07-04 13:51:03 5.340 sec 12 2.98879
## 14 2017-07-04 13:51:03 5.379 sec 13 2.93352
## 15 2017-07-04 13:51:03 5.420 sec 14 2.88765
## 16 2017-07-04 13:51:03 5.459 sec 15 2.82578
## 17 2017-07-04 13:51:03 5.499 sec 16 2.77432
## training_mae training_deviance
## 1
## 2 3.06495 18.59741
## 3 2.93380 17.06727
## 4 2.78295 15.00984
## 5 2.75611 14.21899
## 6 2.62603 12.75065
## 7 2.54368 11.91402
## 8 2.43841 10.74207
## 9 2.37809 10.44719
## 10 2.35941 10.38293
## 11 2.29128 9.72875
## 12 2.26552 9.50687
## 13 2.20734 8.93289
## 14 2.15999 8.60552
## 15 2.11868 8.33852
## 16 2.08613 7.98506
## 17 2.05554 7.69687
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance scaled_importance percentage
## 1 Obama 2573658.750000 1.000000 0.554100
## 2 Romney 789922.812500 0.306926 0.170068
## 3 White 386090.875000 0.150016 0.083124
## 4 Black 239763.156250 0.093160 0.051620
## 5 Black-Owned Firms 199841.468750 0.077649 0.043025
##
## ---
## variable relative_importance
## 45 population2010 2981.910645
## 46 % Change - Private Nonfarm Employment 2620.126709
## 47 Women 2432.234863
## 48 Building Permits 2127.586670
## 49 Mormon 1653.549561
## 50 Manufacturers Shipments - 2007 1473.427246
## scaled_importance percentage
## 45 0.001159 0.000642
## 46 0.001018 0.000564
## 47 0.000945 0.000524
## 48 0.000827 0.000458
## 49 0.000642 0.000356
## 50 0.000573 0.000317
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 2573658.750000
## 2 Romney 789922.812500
## 3 White 386090.875000
## 4 Black 239763.156250
## 5 Black-Owned Firms 199841.468750
## 6 Homeownership Rate 59751.312500
## 7 % Female 2014 59501.601562
## 8 Edu_batchelors 23063.269531
## 9 Households 22870.996094
## 10 Income 19576.031250
## 11 Foreign Born 18316.865234
## 12 Density 16544.300781
## 13 Nonemployer Establishments - 2013 14730.364258
## 14 Hispanic 14423.869141
## 15 Housing Units in Multi-Unit Structures 13085.135742
## 16 Veterans 12188.005859
## 17 NonEnglish 11087.466797
## 18 age65plus 10934.945312
## 19 Median Household Income 10572.824219
## 20 Housing Units 2014 9827.556641
## scaled_importance percentage
## 1 1.000000 0.554100
## 2 0.306926 0.170068
## 3 0.150016 0.083124
## 4 0.093160 0.051620
## 5 0.077649 0.043025
## 6 0.023216 0.012864
## 7 0.023119 0.012810
## 8 0.008961 0.004965
## 9 0.008887 0.004924
## 10 0.007606 0.004215
## 11 0.007117 0.003944
## 12 0.006428 0.003562
## 13 0.005724 0.003171
## 14 0.005604 0.003105
## 15 0.005084 0.002817
## 16 0.004736 0.002624
## 17 0.004308 0.002387
## 18 0.004249 0.002354
## 19 0.004108 0.002276
## 20 0.003819 0.002116
h2o.varimp_plot(Clinton_features, num_of_features = 20)
red.Trump = as.h2o(Red_Features_Trump)
##
|
| | 0%
|
|=================================================================| 100%
vars.Trump = colnames(red.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]
Trump_features = h2o.randomForest(x=x_vars.Trump,
y=y_var.Trump,
seed=123,
training_frame = red.Trump,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|========================================== | 64%
|
|=========================================================== | 91%
|
|=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_17
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 13 13 178646 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1066 1111 1088.15380
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 12.46205
## RMSE: 3.530163
## MAE: 2.493103
## RMSLE: 0.06657728
## Mean Residual Deviance : 12.46205
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 9.481995
## RMSE: 3.079285
## MAE: 2.163804
## RMSLE: 0.06018314
## Mean Residual Deviance : 9.481995
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 2.1636562 0.10761768 2.1019683 2.1061199 2.432384
## mse 9.51925 1.5137854 8.617245 7.755696 10.458083
## r2 0.9525106 0.0053042234 0.95542735 0.9615022 0.9545914
## residual_deviance 9.51925 1.5137854 8.617245 7.755696 10.458083
## rmse 3.0678484 0.23190086 2.9355145 2.784905 3.233896
## rmsle 0.05940332 0.0074851457 0.05184572 0.05284996 0.06195238
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 2.0244572 2.3964448 2.0110521 2.3069484 1.981716
## mse 8.873459 14.25896 8.232698 12.732398 7.9788194
## r2 0.94354117 0.94064057 0.95439506 0.9433518 0.954677
## residual_deviance 8.873459 14.25896 8.232698 12.732398 7.9788194
## rmse 2.978835 3.7761037 2.869268 3.5682485 2.8246803
## rmsle 0.054029405 0.07046554 0.048746984 0.07363369 0.0791951
## cv_9_valid cv_10_valid
## mae 2.1530216 2.1224499
## mse 8.164184 8.12096
## r2 0.9520078 0.9649717
## residual_deviance 8.164184 8.12096
## rmse 2.8573036 2.8497298
## rmsle 0.049664658 0.051649746
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:10 4.335 sec 0
## 2 2017-07-04 13:51:10 4.376 sec 1 5.40441
## 3 2017-07-04 13:51:10 4.416 sec 2 4.86521
## 4 2017-07-04 13:51:10 4.456 sec 3 4.77314
## 5 2017-07-04 13:51:10 4.497 sec 4 4.43548
## 6 2017-07-04 13:51:10 4.545 sec 5 4.38092
## 7 2017-07-04 13:51:10 4.584 sec 6 4.17351
## 8 2017-07-04 13:51:10 4.627 sec 7 4.01017
## 9 2017-07-04 13:51:10 4.666 sec 8 3.90818
## 10 2017-07-04 13:51:10 4.707 sec 9 3.80337
## 11 2017-07-04 13:51:10 4.745 sec 10 3.76355
## 12 2017-07-04 13:51:10 4.785 sec 11 3.67885
## 13 2017-07-04 13:51:10 4.826 sec 12 3.60753
## 14 2017-07-04 13:51:10 4.866 sec 13 3.53016
## training_mae training_deviance
## 1
## 2 3.71802 29.20760
## 3 3.43668 23.67027
## 4 3.36211 22.78284
## 5 3.14665 19.67345
## 6 3.03613 19.19247
## 7 2.90393 17.41821
## 8 2.80249 16.08148
## 9 2.74589 15.27384
## 10 2.69627 14.46563
## 11 2.66658 14.16431
## 12 2.59878 13.53392
## 13 2.54445 13.01425
## 14 2.49310 12.46205
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance scaled_importance percentage
## 1 Obama 1888954.000000 1.000000 0.508296
## 2 Romney 832012.375000 0.440462 0.223885
## 3 White 190387.718750 0.100790 0.051231
## 4 Black-Owned Firms 162311.562500 0.085927 0.043676
## 5 Black 158985.765625 0.084166 0.042781
##
## ---
## variable relative_importance
## 45 Catholic 2947.209473
## 46 Retail Sales / Capita - 2007 2945.502686
## 47 % Change - Private Nonfarm Employment 2780.957764
## 48 Women 2684.436523
## 49 Households 1600.129517
## 50 Manufacturers Shipments - 2007 794.295288
## scaled_importance percentage
## 45 0.001560 0.000793
## 46 0.001559 0.000793
## 47 0.001472 0.000748
## 48 0.001421 0.000722
## 49 0.000847 0.000431
## 50 0.000420 0.000214
h2o.varimp(Trump_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 1888954.000000
## 2 Romney 832012.375000
## 3 White 190387.718750
## 4 Black-Owned Firms 162311.562500
## 5 Black 158985.765625
## 6 Housing Units in Multi-Unit Structures 92385.445312
## 7 Edu_batchelors 27543.599609
## 8 Median Value of Owner-Occupied Housing Units 23571.136719
## 9 population2010 23484.205078
## 10 Nonemployer Establishments - 2013 18110.349609
## 11 Mormon 16443.634766
## 12 Accommodation and Food Service Sales - 2007 15685.311523
## 13 Foreign Born 14747.731445
## 14 Density 13801.258789
## 15 Hispanic 13292.035156
## 16 Evangelical 13254.381836
## 17 age65plus 12997.441406
## 18 Private Nonfarm Employment 12415.383789
## 19 Poverty 11684.790039
## 20 Persons Under 18 11233.022461
## scaled_importance percentage
## 1 1.000000 0.508296
## 2 0.440462 0.223885
## 3 0.100790 0.051231
## 4 0.085927 0.043676
## 5 0.084166 0.042781
## 6 0.048908 0.024860
## 7 0.014581 0.007412
## 8 0.012478 0.006343
## 9 0.012432 0.006319
## 10 0.009588 0.004873
## 11 0.008705 0.004425
## 12 0.008304 0.004221
## 13 0.007807 0.003968
## 14 0.007306 0.003714
## 15 0.007037 0.003577
## 16 0.007017 0.003567
## 17 0.006881 0.003497
## 18 0.006573 0.003341
## 19 0.006186 0.003144
## 20 0.005947 0.003023
h2o.varimp_plot(Trump_features, num_of_features = 20)
red.Swing = as.h2o(Red_Features_Shift)
##
|
| | 0%
|
|=================================================================| 100%
vars.Swing = colnames(red.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]
Swing_features = h2o.randomForest(x=x_vars.Swing,
y=y_var.Swing,
seed=123,
training_frame = red.Swing,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|= | 2%
|
|============================== | 47%
|
|================================================ | 74%
|
|=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_18
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 19 19 265331 20
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 20.00000 1070 1149 1105.68420
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 23.24074
## RMSE: 4.820866
## MAE: 3.596102
## RMSLE: NaN
## Mean Residual Deviance : 23.24074
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 19.71987
## RMSE: 4.440706
## MAE: 3.331273
## RMSLE: NaN
## Mean Residual Deviance : 19.71987
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 3.3331819 0.15898156 3.0794082 3.5649335 3.2170181
## mse 19.765106 2.194617 17.525085 22.534449 18.386826
## r2 0.7810878 0.018874457 0.78234655 0.7453276 0.79909736
## residual_deviance 19.765106 2.194617 17.525085 22.534449 18.386826
## rmse 4.4322605 0.24512383 4.1862974 4.7470465 4.2879863
## rmsle 0.0 NaN NaN NaN NaN
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 3.3267343 3.3794236 3.4389255 3.7522597 3.105099
## mse 19.57111 20.494764 20.671776 26.309776 16.544827
## r2 0.77163637 0.8168954 0.76845205 0.75229067 0.83297205
## residual_deviance 19.57111 20.494764 20.671776 26.309776 16.544827
## rmse 4.4239244 4.5271144 4.5466223 5.129306 4.067533
## rmsle NaN NaN NaN NaN NaN
## cv_9_valid cv_10_valid
## mae 3.471935 2.9960814
## mse 20.926386 14.686062
## r2 0.7594977 0.7823626
## residual_deviance 20.926386 14.686062
## rmse 4.574537 3.8322399
## rmsle NaN NaN
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:18 6.313 sec 0
## 2 2017-07-04 13:51:18 6.351 sec 1 6.97728
## 3 2017-07-04 13:51:18 6.389 sec 2 7.09694
## 4 2017-07-04 13:51:18 6.426 sec 3 6.86900
## 5 2017-07-04 13:51:18 6.463 sec 4 6.45459
## 6 2017-07-04 13:51:18 6.503 sec 5 6.15812
## 7 2017-07-04 13:51:18 6.539 sec 6 5.86918
## 8 2017-07-04 13:51:18 6.577 sec 7 5.76196
## 9 2017-07-04 13:51:18 6.618 sec 8 5.57149
## 10 2017-07-04 13:51:19 6.656 sec 9 5.48650
## 11 2017-07-04 13:51:19 6.694 sec 10 5.33859
## 12 2017-07-04 13:51:19 6.732 sec 11 5.27095
## 13 2017-07-04 13:51:19 6.769 sec 12 5.21179
## 14 2017-07-04 13:51:19 6.808 sec 13 5.14405
## 15 2017-07-04 13:51:19 6.845 sec 14 5.07810
## 16 2017-07-04 13:51:19 6.886 sec 15 5.00606
## 17 2017-07-04 13:51:19 6.926 sec 16 4.95956
## 18 2017-07-04 13:51:19 6.968 sec 17 4.88909
## 19 2017-07-04 13:51:19 7.007 sec 18 4.86162
## 20 2017-07-04 13:51:19 7.048 sec 19 4.82087
## training_mae training_deviance
## 1
## 2 5.11179 48.68248
## 3 5.10788 50.36651
## 4 5.06503 47.18318
## 5 4.80430 41.66175
## 6 4.58940 37.92244
## 7 4.39774 34.44727
## 8 4.29509 33.20024
## 9 4.16718 31.04152
## 10 4.10427 30.10166
## 11 3.98283 28.50056
## 12 3.94564 27.78290
## 13 3.88276 27.16279
## 14 3.81960 26.46128
## 15 3.75647 25.78709
## 16 3.70947 25.06059
## 17 3.67816 24.59728
## 18 3.62575 23.90319
## 19 3.61080 23.63530
## 20 3.59610 23.24074
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance scaled_importance percentage
## 1 Foreign Born 260140.906250 1.000000 0.120291
## 2 Obama 147572.171875 0.567278 0.068239
## 3 Edu_batchelors 144654.000000 0.556060 0.066889
## 4 age65plus 142850.734375 0.549128 0.066055
## 5 Romney 142136.234375 0.546382 0.065725
##
## ---
## variable relative_importance
## 45 population2014 5878.897949
## 46 Hispanic-Owned Firms 5656.727539
## 47 Accommodation and Food Service Sales - 2007 4519.589355
## 48 Merchant Wholesaler Sales - 2007 4270.420410
## 49 Black-Owned Firms 3349.556885
## 50 Manufacturers Shipments - 2007 3252.891846
## scaled_importance percentage
## 45 0.022599 0.002718
## 46 0.021745 0.002616
## 47 0.017374 0.002090
## 48 0.016416 0.001975
## 49 0.012876 0.001549
## 50 0.012504 0.001504
h2o.varimp(Swing_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Foreign Born 260140.906250
## 2 Obama 147572.171875
## 3 Edu_batchelors 144654.000000
## 4 age65plus 142850.734375
## 5 Romney 142136.234375
## 6 Hispanic 124024.648438
## 7 Median Value of Owner-Occupied Housing Units 119557.585938
## 8 Black 112236.117188
## 9 Christian 85163.335938
## 10 White 82062.125000
## 11 Nonemployer Establishments - 2013 76253.500000
## 12 Building Permits 63448.925781
## 13 Evangelical 62427.707031
## 14 Total Number of Firms 54842.359375
## 15 Mormon 51562.949219
## 16 NonEnglish 37711.167969
## 17 Edu_highschool 33791.375000
## 18 Persons Under 18 29549.197266
## 19 Persons/Household 28367.781250
## 20 Persons Under 5 22195.193359
## scaled_importance percentage
## 1 1.000000 0.120291
## 2 0.567278 0.068239
## 3 0.556060 0.066889
## 4 0.549128 0.066055
## 5 0.546382 0.065725
## 6 0.476760 0.057350
## 7 0.459588 0.055284
## 8 0.431444 0.051899
## 9 0.327374 0.039380
## 10 0.315453 0.037946
## 11 0.293124 0.035260
## 12 0.243902 0.029339
## 13 0.239977 0.028867
## 14 0.210818 0.025360
## 15 0.198212 0.023843
## 16 0.144964 0.017438
## 17 0.129896 0.015625
## 18 0.113589 0.013664
## 19 0.109048 0.013118
## 20 0.085320 0.010263
h2o.varimp_plot(Swing_features, num_of_features = 20)
blue.Clinton = as.h2o(Blue_Features_clinton)
##
|
| | 0%
|
|=================================================================| 100%
vars.Clinton = colnames(blue.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]
Clinton_features = h2o.randomForest(x=x_vars.Clinton,
y=y_var.Clinton,
seed=123,
training_frame = blue.Clinton,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|========================================== | 64%
|
|=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_19
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 11 11 37623 13
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 18 15.09091 258 280 267.09090
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 17.86892
## RMSE: 4.227164
## MAE: 3.093524
## RMSLE: 0.1000788
## Mean Residual Deviance : 17.86892
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 12.65615
## RMSE: 3.557548
## MAE: 2.716101
## RMSLE: 0.08515592
## Mean Residual Deviance : 12.65615
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 2.7365935 0.27456042 2.4079978 2.6331956 2.1940808
## mse 12.756155 3.0731835 10.893368 14.223735 7.222359
## r2 0.9489598 0.011713537 0.94676566 0.9416267 0.9582436
## residual_deviance 12.756155 3.0731835 10.893368 14.223735 7.222359
## rmse 3.5198364 0.42831442 3.3005102 3.7714367 2.6874447
## rmsle 0.084931456 0.006937669 0.07796885 0.08770437 0.06532815
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 3.2233045 2.3888707 2.8357618 2.9102492 2.2985606
## mse 15.218432 8.519886 11.42654 14.744233 7.315518
## r2 0.9361236 0.96321857 0.9648951 0.9506188 0.9763101
## residual_deviance 15.218432 8.519886 11.42654 14.744233 7.315518
## rmse 3.9010808 2.9188843 3.3803167 3.839822 2.7047215
## rmsle 0.0899637 0.088544466 0.09011477 0.08825237 0.07405125
## cv_9_valid cv_10_valid
## mae 3.1906219 3.283291
## mse 16.226276 21.771202
## r2 0.9349227 0.9168737
## residual_deviance 16.226276 21.771202
## rmse 4.0281854 4.665962
## rmsle 0.08436655 0.10302007
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:21 1.026 sec 0
## 2 2017-07-04 13:51:21 1.041 sec 1 7.20274
## 3 2017-07-04 13:51:21 1.053 sec 2 6.16725
## 4 2017-07-04 13:51:21 1.065 sec 3 5.54210
## 5 2017-07-04 13:51:21 1.078 sec 4 5.07667
## 6 2017-07-04 13:51:21 1.090 sec 5 4.76479
## 7 2017-07-04 13:51:21 1.101 sec 6 4.49074
## 8 2017-07-04 13:51:21 1.113 sec 7 4.30113
## 9 2017-07-04 13:51:21 1.124 sec 8 4.56498
## 10 2017-07-04 13:51:21 1.135 sec 9 4.31874
## 11 2017-07-04 13:51:21 1.148 sec 10 4.57054
## 12 2017-07-04 13:51:21 1.160 sec 11 4.22716
## training_mae training_deviance
## 1
## 2 5.38390 51.87941
## 3 4.68282 38.03495
## 4 4.26325 30.71489
## 5 3.93716 25.77258
## 6 3.67798 22.70322
## 7 3.44672 20.16675
## 8 3.26608 18.49973
## 9 3.35753 20.83906
## 10 3.20815 18.65151
## 11 3.23677 20.88984
## 12 3.09352 17.86892
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Obama 521138.156250
## 2 Edu_batchelors 143678.546875
## 3 Romney 97947.765625
## 4 Housing Units in Multi-Unit Structures 42949.753906
## 5 Housing Units 2014 39517.859375
## scaled_importance percentage
## 1 1.000000 0.534021
## 2 0.275701 0.147230
## 3 0.187950 0.100369
## 4 0.082415 0.044011
## 5 0.075830 0.040495
##
## ---
## variable relative_importance
## 45 Households 495.466919
## 46 % Change - Private Nonfarm Employment 488.615082
## 47 Building Permits 439.591797
## 48 Living in Same House 1+ Years 420.951080
## 49 Median Household Income 386.190063
## 50 population2010 260.440430
## scaled_importance percentage
## 45 0.000951 0.000508
## 46 0.000938 0.000501
## 47 0.000844 0.000450
## 48 0.000808 0.000431
## 49 0.000741 0.000396
## 50 0.000500 0.000267
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 521138.156250
## 2 Edu_batchelors 143678.546875
## 3 Romney 97947.765625
## 4 Housing Units in Multi-Unit Structures 42949.753906
## 5 Housing Units 2014 39517.859375
## 6 Foreign Born 21875.576172
## 7 NonEnglish 16887.023438
## 8 Median Value of Owner-Occupied Housing Units 16468.039062
## 9 Density 9315.324219
## 10 Black-Owned Firms 5275.490723
## 11 Nonemployer Establishments - 2013 4924.349609
## 12 Accommodation and Food Service Sales - 2007 4899.251465
## 13 Poverty 4471.787598
## 14 Homeownership Rate 3373.303711
## 15 Persons/Household 2891.104492
## 16 Evangelical 2808.970703
## 17 Protestant 2415.911621
## 18 White 2267.387939
## 19 Manufacturers Shipments - 2007 2228.818604
## 20 Retail Sales - 2007 2164.006836
## scaled_importance percentage
## 1 1.000000 0.534021
## 2 0.275701 0.147230
## 3 0.187950 0.100369
## 4 0.082415 0.044011
## 5 0.075830 0.040495
## 6 0.041977 0.022416
## 7 0.032404 0.017304
## 8 0.031600 0.016875
## 9 0.017875 0.009546
## 10 0.010123 0.005406
## 11 0.009449 0.005046
## 12 0.009401 0.005020
## 13 0.008581 0.004582
## 14 0.006473 0.003457
## 15 0.005548 0.002963
## 16 0.005390 0.002878
## 17 0.004636 0.002476
## 18 0.004351 0.002323
## 19 0.004277 0.002284
## 20 0.004152 0.002218
h2o.varimp_plot(Clinton_features, num_of_features = 20)
blue.Trump = as.h2o(Blue_Features_Trump)
##
|
| | 0%
|
|=================================================================| 100%
vars.Trump = colnames(blue.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]
Trump_features = h2o.randomForest(x=x_vars.Trump,
y=y_var.Trump,
seed=123,
training_frame = blue.Trump,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|================================================ | 74%
|
|=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_20
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 15 15 51240 13
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 16 14.93333 256 284 266.93332
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 16.72938
## RMSE: 4.090157
## MAE: 3.076985
## RMSLE: 0.1290486
## Mean Residual Deviance : 16.72938
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 14.90733
## RMSE: 3.861001
## MAE: 3.064429
## RMSLE: 0.1172027
## Mean Residual Deviance : 14.90733
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 3.0837953 0.28637564 2.7118068 2.824019 3.3638227
## mse 14.96513 2.1702669 11.829679 17.16653 16.693277
## r2 0.94103885 0.010828344 0.9446266 0.92939544 0.91053694
## residual_deviance 14.96513 2.1702669 11.829679 17.16653 16.693277
## rmse 3.8452232 0.29949012 3.43943 4.1432514 4.0857406
## rmsle 0.11109822 0.02232643 0.100389555 0.18709083 0.09374424
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 3.49672 2.319074 3.5025194 2.6726663 3.0889263
## mse 18.01212 8.360348 16.46241 11.838682 16.484095
## r2 0.92913544 0.96507454 0.95141697 0.9595432 0.94678384
## residual_deviance 18.01212 8.360348 16.46241 11.838682 16.484095
## rmse 4.2440686 2.8914266 4.0573897 3.4407387 4.060061
## rmsle 0.11449931 0.06198062 0.10806376 0.08483107 0.12588707
## cv_9_valid cv_10_valid
## mae 3.3352625 3.5231369
## mse 14.793237 18.010918
## r2 0.94081587 0.9330592
## residual_deviance 14.793237 18.010918
## rmse 3.8461976 4.243927
## rmsle 0.10381192 0.1306839
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:24 1.231 sec 0
## 2 2017-07-04 13:51:24 1.243 sec 1 7.70278
## 3 2017-07-04 13:51:24 1.253 sec 2 6.50961
## 4 2017-07-04 13:51:24 1.265 sec 3 5.92579
## 5 2017-07-04 13:51:24 1.275 sec 4 5.78929
## 6 2017-07-04 13:51:24 1.286 sec 5 5.26220
## 7 2017-07-04 13:51:24 1.296 sec 6 5.15896
## 8 2017-07-04 13:51:24 1.308 sec 7 5.00656
## 9 2017-07-04 13:51:24 1.319 sec 8 5.11579
## 10 2017-07-04 13:51:24 1.331 sec 9 4.77839
## 11 2017-07-04 13:51:24 1.342 sec 10 4.56240
## 12 2017-07-04 13:51:24 1.354 sec 11 4.42896
## 13 2017-07-04 13:51:24 1.364 sec 12 4.36396
## 14 2017-07-04 13:51:24 1.376 sec 13 4.20601
## 15 2017-07-04 13:51:24 1.386 sec 14 4.22847
## 16 2017-07-04 13:51:24 1.397 sec 15 4.09016
## training_mae training_deviance
## 1
## 2 5.66128 59.33281
## 3 4.93430 42.37508
## 4 4.60441 35.11494
## 5 4.45181 33.51584
## 6 4.13173 27.69074
## 7 4.03389 26.61488
## 8 3.82239 25.06560
## 9 3.76684 26.17127
## 10 3.61586 22.83302
## 11 3.48712 20.81553
## 12 3.38438 19.61569
## 13 3.35582 19.04417
## 14 3.19982 17.69053
## 15 3.19883 17.87997
## 16 3.07698 16.72938
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Obama 552152.312500
## 2 Romney 319621.687500
## 3 Edu_batchelors 141846.843750
## 4 Housing Units in Multi-Unit Structures 79966.773438
## 5 Foreign Born 67501.562500
## scaled_importance percentage
## 1 1.000000 0.409437
## 2 0.578865 0.237009
## 3 0.256898 0.105184
## 4 0.144827 0.059298
## 5 0.122252 0.050054
##
## ---
## variable relative_importance
## 45 % Change - Private Nonfarm Employment 1026.374756
## 46 Retail Sales - 2007 1000.179504
## 47 Private Nonfarm Establishments 2013 929.477112
## 48 Total Number of Firms 916.109863
## 49 Housing Units 2014 684.774475
## 50 Nonemployer Establishments - 2013 186.480164
## scaled_importance percentage
## 45 0.001859 0.000761
## 46 0.001811 0.000742
## 47 0.001683 0.000689
## 48 0.001659 0.000679
## 49 0.001240 0.000508
## 50 0.000338 0.000138
h2o.varimp(Trump_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 552152.312500
## 2 Romney 319621.687500
## 3 Edu_batchelors 141846.843750
## 4 Housing Units in Multi-Unit Structures 79966.773438
## 5 Foreign Born 67501.562500
## 6 Median Value of Owner-Occupied Housing Units 57713.132812
## 7 NonEnglish 29647.656250
## 8 Density 7088.221680
## 9 Protestant 4866.725098
## 10 Women 4536.835449
## 11 Homeownership Rate 4282.443848
## 12 Hispanic-Owned Firms 4106.511719
## 13 Hispanic 3964.405518
## 14 Poverty 3705.902588
## 15 Persons/Household 3631.931641
## 16 Total_Pop 3313.454346
## 17 Accommodation and Food Service Sales - 2007 3250.160400
## 18 Black-Owned Firms 3177.686279
## 19 population_change 3162.727051
## 20 population2010 3098.852783
## scaled_importance percentage
## 1 1.000000 0.409437
## 2 0.578865 0.237009
## 3 0.256898 0.105184
## 4 0.144827 0.059298
## 5 0.122252 0.050054
## 6 0.104524 0.042796
## 7 0.053695 0.021985
## 8 0.012837 0.005256
## 9 0.008814 0.003609
## 10 0.008217 0.003364
## 11 0.007756 0.003176
## 12 0.007437 0.003045
## 13 0.007180 0.002940
## 14 0.006712 0.002748
## 15 0.006578 0.002693
## 16 0.006001 0.002457
## 17 0.005886 0.002410
## 18 0.005755 0.002356
## 19 0.005728 0.002345
## 20 0.005612 0.002298
h2o.varimp_plot(Trump_features, num_of_features = 20)
blue.Swing = as.h2o(Blue_Features_Shift)
##
|
| | 0%
|
|=================================================================| 100%
vars.Swing = colnames(blue.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]
Swing_features = h2o.randomForest(x=x_vars.Swing,
y=y_var.Swing,
seed=123,
training_frame = blue.Swing,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|==================================== | 56%
|
|=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_21
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 10 10 34595 15
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 16.60000 260 286 270.00000
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 33.22862
## RMSE: 5.764427
## MAE: 4.465677
## RMSLE: NaN
## Mean Residual Deviance : 33.22862
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 30.54431
## RMSE: 5.526691
## MAE: 4.336278
## RMSLE: NaN
## Mean Residual Deviance : 30.54431
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 4.3627768 0.32940245 3.6924987 4.488959 4.295938
## mse 31.024128 5.0281153 21.930616 31.969517 31.357307
## r2 0.7050931 0.07328907 0.761572 0.5464774 0.752726
## residual_deviance 31.024128 5.0281153 21.930616 31.969517 31.357307
## rmse 5.5322576 0.45730403 4.6830134 5.654159 5.5997596
## rmsle 0.0 NaN NaN NaN NaN
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 5.4299264 3.6811585 4.252129 4.5538936 4.5449796
## mse 45.35922 18.635963 31.33096 37.94935 32.63026
## r2 0.6590757 0.81730515 0.76446944 0.48357728 0.76171446
## residual_deviance 45.35922 18.635963 31.33096 37.94935 32.63026
## rmse 6.7349253 4.316939 5.5974064 6.160304 5.7122903
## rmsle NaN NaN NaN NaN NaN
## cv_9_valid cv_10_valid
## mae 4.4289503 4.259334
## mse 31.531193 27.546896
## r2 0.7212148 0.7827988
## residual_deviance 31.531193 27.546896
## rmse 5.6152644 5.2485137
## rmsle NaN NaN
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:26 1.181 sec 0
## 2 2017-07-04 13:51:26 1.198 sec 1 7.88609
## 3 2017-07-04 13:51:26 1.215 sec 2 7.86147
## 4 2017-07-04 13:51:27 1.231 sec 3 7.21911
## 5 2017-07-04 13:51:27 1.245 sec 4 6.99703
## 6 2017-07-04 13:51:27 1.258 sec 5 6.70211
## 7 2017-07-04 13:51:27 1.273 sec 6 6.46323
## 8 2017-07-04 13:51:27 1.287 sec 7 6.25721
## 9 2017-07-04 13:51:27 1.302 sec 8 6.04191
## 10 2017-07-04 13:51:27 1.315 sec 9 5.85544
## 11 2017-07-04 13:51:27 1.328 sec 10 5.76443
## training_mae training_deviance
## 1
## 2 6.14891 62.19049
## 3 6.14893 61.80270
## 4 5.77634 52.11562
## 5 5.50970 48.95839
## 6 5.25840 44.91835
## 7 5.07395 41.77339
## 8 4.89873 39.15264
## 9 4.68931 36.50465
## 10 4.52406 34.28619
## 11 4.46568 33.22862
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 population_change 93590.648438
## 2 Foreign Born 56283.562500
## 3 Median Value of Owner-Occupied Housing Units 41131.621094
## 4 Edu_batchelors 33128.046875
## 5 Hispanic 29202.830078
## scaled_importance percentage
## 1 1.000000 0.254601
## 2 0.601380 0.153112
## 3 0.439484 0.111893
## 4 0.353967 0.090121
## 5 0.312027 0.079442
##
## ---
## variable relative_importance
## 45 Households 566.473389
## 46 Nonemployer Establishments - 2013 559.845215
## 47 Private Nonfarm Establishments 2013 510.509491
## 48 population2010 502.221375
## 49 Veterans 362.028351
## 50 Retail Sales - 2007 284.668762
## scaled_importance percentage
## 45 0.006053 0.001541
## 46 0.005982 0.001523
## 47 0.005455 0.001389
## 48 0.005366 0.001366
## 49 0.003868 0.000985
## 50 0.003042 0.000774
h2o.varimp(Swing_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 population_change 93590.648438
## 2 Foreign Born 56283.562500
## 3 Median Value of Owner-Occupied Housing Units 41131.621094
## 4 Edu_batchelors 33128.046875
## 5 Hispanic 29202.830078
## 6 Edu_highschool 8948.615234
## 7 NonEnglish 8292.908203
## 8 Obama 6432.069336
## 9 Income 6342.848145
## 10 Travel Time to Work 6284.697754
## 11 age65plus 6031.405762
## 12 population2014 4929.368652
## 13 Evangelical 4168.486816
## 14 Romney 3996.504150
## 15 Land Area (in sq miles) 3870.975586
## 16 White 3538.895508
## 17 Poverty 3165.208984
## 18 % Female 2014 3091.719482
## 19 Living in Same House 1+ Years 2697.395752
## 20 Manufacturers Shipments - 2007 2520.426270
## scaled_importance percentage
## 1 1.000000 0.254601
## 2 0.601380 0.153112
## 3 0.439484 0.111893
## 4 0.353967 0.090121
## 5 0.312027 0.079442
## 6 0.095614 0.024344
## 7 0.088608 0.022560
## 8 0.068726 0.017498
## 9 0.067772 0.017255
## 10 0.067151 0.017097
## 11 0.064445 0.016408
## 12 0.052669 0.013410
## 13 0.044540 0.011340
## 14 0.042702 0.010872
## 15 0.041361 0.010530
## 16 0.037812 0.009627
## 17 0.033820 0.008611
## 18 0.033034 0.008411
## 19 0.028821 0.007338
## 20 0.026930 0.006856
h2o.varimp_plot(Swing_features, num_of_features = 20)
swing.Clinton = as.h2o(Swing_Features_clinton)
##
|
| | 0%
|
|=================================================================| 100%
vars.Clinton = colnames(swing.Clinton)
x_vars.Clinton = c(vars.Clinton[2:51])
y_var.Clinton = vars.Clinton[1]
Clinton_features = h2o.randomForest(x=x_vars.Clinton,
y=y_var.Clinton,
seed=123,
training_frame = swing.Clinton,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|===================================================== | 82%
|
|=================================================================| 100%
summary(Clinton_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_22
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 12 12 87721 17
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 18.41667 557 593 576.66670
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 11.16537
## RMSE: 3.341463
## MAE: 2.459176
## RMSLE: 0.0896343
## Mean Residual Deviance : 11.16537
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 8.357622
## RMSE: 2.890955
## MAE: 2.17877
## RMSLE: 0.07898867
## Mean Residual Deviance : 8.357622
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid
## mae 2.1764514 0.15296449 2.4029014 2.1700869
## mse 8.350991 1.6560885 10.382624 7.5849447
## r2 0.9486032 0.0076983566 0.9301555 0.9521475
## residual_deviance 8.350991 1.6560885 10.382624 7.5849447
## rmse 2.8631918 0.2766986 3.2222078 2.754078
## rmsle 0.07846324 0.0067237546 0.085754074 0.06702775
## cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae 2.05005 2.1309838 2.2798448 1.8299347 2.3393886
## mse 7.0528684 7.709677 7.804712 5.3534007 13.17103
## r2 0.9482558 0.9543957 0.9499299 0.9694844 0.9446068
## residual_deviance 7.0528684 7.709677 7.804712 5.3534007 13.17103
## rmse 2.6557238 2.7766306 2.7936914 2.3137417 3.6291914
## rmsle 0.0769472 0.10130545 0.07168021 0.073419444 0.08333611
## cv_8_valid cv_9_valid cv_10_valid
## mae 2.587893 1.9857495 1.9876814
## mse 11.32123 6.9654436 6.163981
## r2 0.9331973 0.94539094 0.9584686
## residual_deviance 11.32123 6.9654436 6.163981
## rmse 3.3647037 2.6392126 2.4827366
## rmsle 0.081611395 0.072677046 0.07087373
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:30 1.965 sec 0
## 2 2017-07-04 13:51:30 1.985 sec 1 4.76030
## 3 2017-07-04 13:51:30 2.007 sec 2 4.36691
## 4 2017-07-04 13:51:30 2.027 sec 3 4.26996
## 5 2017-07-04 13:51:30 2.049 sec 4 4.08651
## 6 2017-07-04 13:51:30 2.071 sec 5 4.04406
## 7 2017-07-04 13:51:30 2.093 sec 6 3.96838
## 8 2017-07-04 13:51:30 2.114 sec 7 3.72528
## 9 2017-07-04 13:51:30 2.137 sec 8 3.62964
## 10 2017-07-04 13:51:30 2.159 sec 9 3.58004
## 11 2017-07-04 13:51:30 2.182 sec 10 3.57654
## 12 2017-07-04 13:51:30 2.204 sec 11 3.42839
## 13 2017-07-04 13:51:30 2.226 sec 12 3.34146
## training_mae training_deviance
## 1
## 2 3.45215 22.66043
## 3 3.26566 19.06988
## 4 3.19231 18.23253
## 5 3.08619 16.69955
## 6 2.99280 16.35440
## 7 2.91640 15.74806
## 8 2.75213 13.87770
## 9 2.65912 13.17431
## 10 2.60208 12.81665
## 11 2.59165 12.79160
## 12 2.51838 11.75382
## 13 2.45918 11.16537
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Obama 777059.187500
## 2 Romney 204742.046875
## 3 Housing Units in Multi-Unit Structures 161199.359375
## 4 White 70074.117188
## 5 Edu_batchelors 43863.246094
## scaled_importance percentage
## 1 1.000000 0.540407
## 2 0.263483 0.142388
## 3 0.207448 0.112106
## 4 0.090179 0.048733
## 5 0.056448 0.030505
##
## ---
## variable relative_importance scaled_importance
## 45 Retail Sales - 2007 792.955688 0.001020
## 46 Total Number of Firms 719.299255 0.000926
## 47 population2014 643.103516 0.000828
## 48 Merchant Wholesaler Sales - 2007 429.022583 0.000552
## 49 Households 368.605408 0.000474
## 50 Hispanic-Owned Firms 162.402924 0.000209
## percentage
## 45 0.000551
## 46 0.000500
## 47 0.000447
## 48 0.000298
## 49 0.000256
## 50 0.000113
h2o.varimp(Clinton_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 777059.187500
## 2 Romney 204742.046875
## 3 Housing Units in Multi-Unit Structures 161199.359375
## 4 White 70074.117188
## 5 Edu_batchelors 43863.246094
## 6 Private Nonfarm Employment 29157.656250
## 7 Black 22736.304688
## 8 Density 12924.420898
## 9 Median Value of Owner-Occupied Housing Units 11243.088867
## 10 Foreign Born 11206.737305
## 11 population_change 6721.944824
## 12 Black-Owned Firms 6536.583496
## 13 Homeownership Rate 4504.656250
## 14 Edu_highschool 4246.471680
## 15 Women 3736.649414
## 16 Building Permits 3633.344238
## 17 Veterans 3322.478760
## 18 NonEnglish 3133.468506
## 19 Accommodation and Food Service Sales - 2007 3039.930908
## 20 Living in Same House 1+ Years 3037.948242
## scaled_importance percentage
## 1 1.000000 0.540407
## 2 0.263483 0.142388
## 3 0.207448 0.112106
## 4 0.090179 0.048733
## 5 0.056448 0.030505
## 6 0.037523 0.020278
## 7 0.029259 0.015812
## 8 0.016632 0.008988
## 9 0.014469 0.007819
## 10 0.014422 0.007794
## 11 0.008650 0.004675
## 12 0.008412 0.004546
## 13 0.005797 0.003133
## 14 0.005465 0.002953
## 15 0.004809 0.002599
## 16 0.004676 0.002527
## 17 0.004276 0.002311
## 18 0.004032 0.002179
## 19 0.003912 0.002114
## 20 0.003910 0.002113
h2o.varimp_plot(Clinton_features, num_of_features = 20)
swing.Trump = as.h2o(Swing_Features_Trump)
##
|
| | 0%
|
|=================================================================| 100%
vars.Trump = colnames(swing.Trump)
x_vars.Trump = c(vars.Trump[2:51])
y_var.Trump = vars.Trump[1]
Trump_features = h2o.randomForest(x=x_vars.Trump,
y=y_var.Trump,
seed=123,
training_frame = swing.Trump,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|===================================================== | 82%
|
|=================================================================| 100%
summary(Trump_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_23
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 11 11 80086 16
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 18.45455 551 589 574.18180
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 12.14839
## RMSE: 3.485455
## MAE: 2.628891
## RMSLE: 0.08165229
## Mean Residual Deviance : 12.14839
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 8.166665
## RMSE: 2.857738
## MAE: 2.152723
## RMSLE: 0.06808743
## Mean Residual Deviance : 8.166665
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid
## mae 2.1566992 0.15428063 2.2658913 2.2943878
## mse 8.182738 1.2559689 8.401714 7.5393467
## r2 0.9511253 0.0067784255 0.9464498 0.9583296
## residual_deviance 8.182738 1.2559689 8.401714 7.5393467
## rmse 2.8428943 0.2243769 2.898571 2.7457871
## rmsle 0.064570345 0.015478144 0.06336895 0.05494856
## cv_3_valid cv_4_valid cv_5_valid cv_6_valid cv_7_valid
## mae 2.1010022 2.1584582 2.1456854 1.6351633 2.353568
## mse 8.701764 8.695951 7.391761 4.7056117 11.743283
## r2 0.9384737 0.94882405 0.95611984 0.97479427 0.9508477
## residual_deviance 8.701764 8.695951 7.391761 4.7056117 11.743283
## rmse 2.9498754 2.94889 2.7187793 2.1692421 3.4268475
## rmsle 0.06087396 0.04998338 0.06637724 0.03820809 0.121566564
## cv_8_valid cv_9_valid cv_10_valid
## mae 2.34893 1.9163048 2.3476014
## mse 9.737516 6.5373297 8.373104
## r2 0.94366795 0.9480574 0.9456889
## residual_deviance 9.737516 6.5373297 8.373104
## rmse 3.1204994 2.5568202 2.8936317
## rmsle 0.082081355 0.053765394 0.05452997
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:34 1.788 sec 0
## 2 2017-07-04 13:51:34 1.813 sec 1 5.08287
## 3 2017-07-04 13:51:34 1.834 sec 2 4.69558
## 4 2017-07-04 13:51:34 1.854 sec 3 4.70470
## 5 2017-07-04 13:51:34 1.875 sec 4 4.34390
## 6 2017-07-04 13:51:34 1.896 sec 5 4.18901
## 7 2017-07-04 13:51:34 1.917 sec 6 3.93378
## 8 2017-07-04 13:51:34 1.939 sec 7 3.81017
## 9 2017-07-04 13:51:34 1.959 sec 8 3.68352
## 10 2017-07-04 13:51:34 1.979 sec 9 3.65731
## 11 2017-07-04 13:51:34 2.002 sec 10 3.60603
## 12 2017-07-04 13:51:34 2.023 sec 11 3.48545
## training_mae training_deviance
## 1
## 2 3.66599 25.83555
## 3 3.43913 22.04844
## 4 3.43246 22.13416
## 5 3.27439 18.86943
## 6 3.13936 17.54784
## 7 2.96050 15.47461
## 8 2.87135 14.51737
## 9 2.79873 13.56832
## 10 2.78048 13.37590
## 11 2.73468 13.00348
## 12 2.62889 12.14839
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Obama 791054.000000
## 2 Housing Units in Multi-Unit Structures 166076.937500
## 3 Romney 134347.890625
## 4 Edu_batchelors 75131.203125
## 5 Private Nonfarm Employment 29824.814453
## scaled_importance percentage
## 1 1.000000 0.575958
## 2 0.209944 0.120919
## 3 0.169834 0.097817
## 4 0.094976 0.054702
## 5 0.037703 0.021715
##
## ---
## variable relative_importance scaled_importance
## 45 Mormon 669.428040 0.000846
## 46 population2014 503.831940 0.000637
## 47 Total Number of Firms 466.595093 0.000590
## 48 Housing Units 2014 452.721649 0.000572
## 49 Households 451.557220 0.000571
## 50 Merchant Wholesaler Sales - 2007 347.838165 0.000440
## percentage
## 45 0.000487
## 46 0.000367
## 47 0.000340
## 48 0.000330
## 49 0.000329
## 50 0.000253
h2o.varimp(Trump_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Obama 791054.000000
## 2 Housing Units in Multi-Unit Structures 166076.937500
## 3 Romney 134347.890625
## 4 Edu_batchelors 75131.203125
## 5 Private Nonfarm Employment 29824.814453
## 6 Black 20885.173828
## 7 Median Value of Owner-Occupied Housing Units 17673.820312
## 8 White 14739.574219
## 9 Density 14615.491211
## 10 Foreign Born 12562.568359
## 11 population_change 11680.703125
## 12 Hispanic 8312.496094
## 13 Income 4991.144531
## 14 age65plus 4984.941895
## 15 Living in Same House 1+ Years 4732.471680
## 16 NonEnglish 4547.553711
## 17 Edu_highschool 4121.343750
## 18 Homeownership Rate 3489.776367
## 19 Protestant 3357.005615
## 20 Veterans 3319.221924
## scaled_importance percentage
## 1 1.000000 0.575958
## 2 0.209944 0.120919
## 3 0.169834 0.097817
## 4 0.094976 0.054702
## 5 0.037703 0.021715
## 6 0.026402 0.015206
## 7 0.022342 0.012868
## 8 0.018633 0.010732
## 9 0.018476 0.010641
## 10 0.015881 0.009147
## 11 0.014766 0.008505
## 12 0.010508 0.006052
## 13 0.006309 0.003634
## 14 0.006302 0.003629
## 15 0.005982 0.003446
## 16 0.005749 0.003311
## 17 0.005210 0.003001
## 18 0.004412 0.002541
## 19 0.004244 0.002444
## 20 0.004196 0.002417
h2o.varimp_plot(Trump_features, num_of_features = 20)
swing.Swing = as.h2o(Swing_Features_Shift)
##
|
| | 0%
|
|=================================================================| 100%
vars.Swing = colnames(swing.Swing)
x_vars.Swing = c(vars.Swing[c(1:49,51)])
y_var.Swing = vars.Swing[50]
Swing_features = h2o.randomForest(x=x_vars.Swing,
y=y_var.Swing,
seed=123,
training_frame = swing.Swing,
ntrees=200,
stopping_rounds = 2,
score_each_iteration = TRUE,
nfolds = 10)
##
|
| | 0%
|
|================================================ | 74%
|
|=================================================================| 100%
summary(Swing_features)
## Model Details:
## ==============
##
## H2ORegressionModel: drf
## Model Key: DRF_model_R_1499186394342_24
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 13 13 95195 19
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 20 19.92308 551 593 577.46155
##
## H2ORegressionMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 26.23519
## RMSE: 5.12203
## MAE: 3.838263
## RMSLE: NaN
## Mean Residual Deviance : 26.23519
##
##
##
## H2ORegressionMetrics: drf
## ** Reported on cross-validation data. **
## ** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 20.73719
## RMSE: 4.553811
## MAE: 3.498301
## RMSLE: NaN
## Mean Residual Deviance : 20.73719
##
##
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## mae 3.4971883 0.25601858 3.771691 3.336542 3.5872598
## mse 20.68097 2.9543352 23.686274 17.17209 23.960949
## r2 0.81846476 0.023982769 0.80416083 0.83921343 0.7725684
## residual_deviance 20.68097 2.9543352 23.686274 17.17209 23.960949
## rmse 4.5206423 0.34982938 4.8668547 4.1439223 4.8949924
## rmsle 0.0 NaN NaN NaN NaN
## cv_4_valid cv_5_valid cv_6_valid cv_7_valid cv_8_valid
## mae 3.6994224 3.6790047 2.567361 3.6218374 3.9002304
## mse 21.105486 20.328083 10.849713 23.64021 26.548128
## r2 0.7882603 0.86201805 0.8884333 0.7895218 0.8213563
## residual_deviance 21.105486 20.328083 10.849713 23.64021 26.548128
## rmse 4.594071 4.5086675 3.2938902 4.8621197 5.1524878
## rmsle NaN NaN NaN NaN NaN
## cv_9_valid cv_10_valid
## mae 3.2200727 3.5884612
## mse 19.201723 20.317043
## r2 0.81467414 0.80444115
## residual_deviance 19.201723 20.317043
## rmse 4.381977 4.507443
## rmsle NaN NaN
##
## Scoring History:
## timestamp duration number_of_trees training_rmse
## 1 2017-07-04 13:51:38 2.326 sec 0
## 2 2017-07-04 13:51:38 2.351 sec 1 7.20856
## 3 2017-07-04 13:51:38 2.379 sec 2 7.52751
## 4 2017-07-04 13:51:38 2.401 sec 3 7.05333
## 5 2017-07-04 13:51:38 2.421 sec 4 6.82414
## 6 2017-07-04 13:51:38 2.443 sec 5 6.52407
## 7 2017-07-04 13:51:38 2.464 sec 6 6.21454
## 8 2017-07-04 13:51:38 2.486 sec 7 5.96204
## 9 2017-07-04 13:51:38 2.507 sec 8 5.72039
## 10 2017-07-04 13:51:38 2.528 sec 9 5.45401
## 11 2017-07-04 13:51:38 2.550 sec 10 5.35521
## 12 2017-07-04 13:51:38 2.573 sec 11 5.23388
## 13 2017-07-04 13:51:38 2.593 sec 12 5.20277
## 14 2017-07-04 13:51:38 2.614 sec 13 5.12203
## training_mae training_deviance
## 1
## 2 5.51825 51.96337
## 3 5.70884 56.66345
## 4 5.33133 49.74952
## 5 5.10411 46.56883
## 6 4.94928 42.56346
## 7 4.67275 38.62053
## 8 4.54714 35.54592
## 9 4.30118 32.72291
## 10 4.14045 29.74626
## 11 4.06707 28.67831
## 12 3.98612 27.39346
## 13 3.91858 27.06877
## 14 3.83826 26.23519
##
## Variable Importances: (Extract with `h2o.varimp`)
## =================================================
##
## Variable Importances:
## variable relative_importance
## 1 Edu_batchelors 289576.156250
## 2 population_change 197768.937500
## 3 Foreign Born 78123.242188
## 4 Median Value of Owner-Occupied Housing Units 60032.285156
## 5 White 50424.574219
## scaled_importance percentage
## 1 1.000000 0.274311
## 2 0.682960 0.187344
## 3 0.269785 0.074005
## 4 0.207311 0.056868
## 5 0.174132 0.047766
##
## ---
## variable relative_importance scaled_importance
## 45 Merchant Wholesaler Sales - 2007 1664.167480 0.005747
## 46 Housing Units 2014 1423.829590 0.004917
## 47 Black-Owned Firms 1282.661987 0.004429
## 48 Households 959.471252 0.003313
## 49 population2010 740.479126 0.002557
## 50 population2014 713.275024 0.002463
## percentage
## 45 0.001576
## 46 0.001349
## 47 0.001215
## 48 0.000909
## 49 0.000701
## 50 0.000676
h2o.varimp(Swing_features)[1:20,]
## Variable Importances:
## variable relative_importance
## 1 Edu_batchelors 289576.156250
## 2 population_change 197768.937500
## 3 Foreign Born 78123.242188
## 4 Median Value of Owner-Occupied Housing Units 60032.285156
## 5 White 50424.574219
## 6 Black 45287.859375
## 7 Density 33644.671875
## 8 Romney 31501.941406
## 9 Obama 30076.775391
## 10 Hispanic 19566.589844
## 11 Evangelical 15555.184570
## 12 Edu_highschool 13553.864258
## 13 Income 12127.321289
## 14 Catholic 11660.565430
## 15 NonEnglish 10811.342773
## 16 % Female 2014 10205.845703
## 17 Total_Pop 7789.696777
## 18 Poverty 7647.796387
## 19 Private Nonfarm Establishments 2013 7351.964844
## 20 Homeownership Rate 7218.028320
## scaled_importance percentage
## 1 1.000000 0.274311
## 2 0.682960 0.187344
## 3 0.269785 0.074005
## 4 0.207311 0.056868
## 5 0.174132 0.047766
## 6 0.156394 0.042901
## 7 0.116186 0.031871
## 8 0.108786 0.029841
## 9 0.103865 0.028491
## 10 0.067570 0.018535
## 11 0.053717 0.014735
## 12 0.046806 0.012839
## 13 0.041880 0.011488
## 14 0.040268 0.011046
## 15 0.037335 0.010241
## 16 0.035244 0.009668
## 17 0.026900 0.007379
## 18 0.026410 0.007245
## 19 0.025389 0.006964
## 20 0.024926 0.006838
h2o.varimp_plot(Swing_features, num_of_features = 20)
Swing_1 = summary(lm(per_shift ~ Swing_Features_Shift[,17], data=Swing_Features_Shift))$adj.r.squared
Swing_2 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5],data=Swing_Features_Shift))$adj.r.squared
Swing_3 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14],data=Swing_Features_Shift))$adj.r.squared
Swing_4 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14] + Swing_Features_Shift[,23],data=Swing_Features_Shift))$adj.r.squared
Swing_5 = summary(lm(per_shift ~ Swing_Features_Shift[,17] + Swing_Features_Shift[,5] + Swing_Features_Shift[,14] + Swing_Features_Shift[,23] + Swing_Features_Shift[,10],data=Swing_Features_Shift))$adj.r.squared
theswing = c(0,Swing_1,Swing_2,Swing_3,Swing_4,Swing_5)
num = c(0,1,2,3,4,5)
plot(num,
theswing,
col = "blue",
type = "b",
main = "Accuracy for Features Selected",
xlab = "Number of features",
ylab = "Accuracy")
cat("Swing - Edu_batchelors:", Swing_1)
## Swing - Edu_batchelors: 0.463958
my_subset = Swing_Features_Shift[,c(5,8,10,12,17,22,23,26,42,46,50,51)]
my_subset = na.omit(my_subset)
my_subset$IncomeXPopChange = my_subset$population_change * my_subset$Edu_batchelors
my_model = lm(per_shift ~ .,data=my_subset)
summary(my_model)
##
## Call:
## lm(formula = per_shift ~ ., data = my_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.2337 -3.4398 -0.1666 3.4990 18.5681
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.113e+01 1.932e+00
## population_change -7.623e-01 1.190e-01
## age65plus -4.233e-02 4.514e-02
## White 2.157e-01 1.650e-02
## Hispanic -2.175e-01 2.251e-02
## Edu_batchelors -7.473e-01 4.418e-02
## `Housing Units in Multi-Unit Structures` -5.966e-03 2.823e-02
## `Median Value of Owner-Occupied Housing Units` -2.250e-05 5.613e-06
## Income 2.239e-04 7.200e-05
## `Building Permits` -4.364e-04 1.930e-04
## Evangelical -2.693e-01 2.443e-02
## Christian 9.522e-02 1.417e-02
## IncomeXPopChange 2.046e-02 4.347e-03
## t value Pr(>|t|)
## (Intercept) 5.759 1.16e-08 ***
## population_change -6.408 2.37e-10 ***
## age65plus -0.938 0.34867
## White 13.071 < 2e-16 ***
## Hispanic -9.662 < 2e-16 ***
## Edu_batchelors -16.916 < 2e-16 ***
## `Housing Units in Multi-Unit Structures` -0.211 0.83269
## `Median Value of Owner-Occupied Housing Units` -4.008 6.63e-05 ***
## Income 3.109 0.00194 **
## `Building Permits` -2.261 0.02400 *
## Evangelical -11.024 < 2e-16 ***
## Christian 6.722 3.17e-11 ***
## IncomeXPopChange 4.707 2.90e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.343 on 903 degrees of freedom
## Multiple R-squared: 0.7577, Adjusted R-squared: 0.7544
## F-statistic: 235.3 on 12 and 903 DF, p-value: < 2.2e-16
votes$margin_2016 = abs(votes$Trump - votes$Clinton)
votes$margin_2012 = abs(votes$Obama - votes$Romney)
votes$polarization = votes$margin_2016 - votes$margin_2012
plot(density(votes$margin_2016),
main = "County Winner Density Plot",
ylab = "density",
xlab = "county margin",
ylim=c(0,.020),
xlim=c(0,100),
col="red")
par(new=T)
plot(density(votes$margin_2012),
main = "County Winner Density Plot",
ylab = "density",
xlab = "county margin",
ylim=c(0,.020),
xlim=c(0,100),
col="blue")
legend("topright",legend=c("2012","2016"),fill=c("blue","red"))
polarization = votes[,c(1,113)]
colnames(polarization) = c("region","value")
polarization$value = cut(polarization$value, breaks = c(-Inf,0,Inf), labels=c("Less Polarized","More Polarized"))
c= CountyChoropleth$new(polarization)
c$title = "Polarization Shift Measurement"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","red"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization
polarization2016 = votes[,c(1,111)]
colnames(polarization2016) = c("region","value")
polarization2016$value = cut(polarization2016$value, breaks = c(0,10,20,50,Inf), labels=c("0%-10% Margin","10%-20% Margin","20%-50% Margin","50%-100% Margin"))
c= CountyChoropleth$new(polarization2016)
c$title = "Polarization Measurement - 2016"
c$add_state_outline = TRUE
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","lightblue","red","darkred"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization2016 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization2016
polarization2012 = votes[,c(1,112)]
colnames(polarization2012) = c("region","value")
polarization2012$value = cut(polarization2012$value, breaks = c(0,10,20,50,Inf), labels=c("0%-10% Margin","10%-20% Margin","20%-50% Margin","50%-100% Margin"))
c= CountyChoropleth$new(polarization2012)
c$title = "Polarization Measurement - 2012"
c$add_state_outline = TRUE
c$set_num_colors(4)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","lightblue","red","darkred"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization2012 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization2012
polarization10 = votes[,c(1,111)]
colnames(polarization10) = c("region","value")
polarization10$value = cut(polarization10$value, breaks = c(0,10,Inf), labels=c("<10% Margin",">10% Margin"))
c= CountyChoropleth$new(polarization10)
c$title = "Polarization @ 10% Margin - 2016"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization10 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization10
polarization102 = votes[,c(1,112)]
colnames(polarization102) = c("region","value")
polarization102$value = cut(polarization102$value, breaks = c(0,10,Inf), labels=c("<10% Margin",">10% Margin"))
c= CountyChoropleth$new(polarization102)
c$title = "Polarization @ 10% Margin - 2012"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization102 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization102
polarization20 = votes[,c(1,111)]
colnames(polarization20) = c("region","value")
polarization20$value = cut(polarization20$value, breaks = c(0,20,Inf), labels=c("<20% Margin",">20% Margin"))
c= CountyChoropleth$new(polarization20)
c$title = "Polarization @ 20% Margin - 2016"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization20 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization20
polarization202 = votes[,c(1,112)]
colnames(polarization202) = c("region","value")
polarization202$value = cut(polarization202$value, breaks = c(0,20,Inf), labels=c("<20% Margin",">20% Margin"))
c= CountyChoropleth$new(polarization202)
c$title = "Polarization @ 20% Margin - 2012"
c$add_state_outline = TRUE
c$set_num_colors(2)
c$ggplot_scale = scale_fill_manual(values = c("darkblue","white"))
c$legend = "Polarization Scale"
c$set_zoom(c("maine","new hampshire", "vermont", "massachusetts","connecticut","rhode island","new york", "pennsylvania", "new jersey", "maryland","delaware","west virginia","virginia","tennessee","kentucky","north carolina","south carolina","georgia","florida","arkansas","mississippi","alabama","louisiana","ohio","michigan","indiana","illinois","wisconsin","minnesota","iowa","missouri","north dakota","south dakota","nebraska","kansas","texas","oklahoma","new mexico","arizona","colorado","wyoming","montana","idaho","utah","nevada","california","oregon","washington"))
county_polarization202 = c$render() + theme(legend.position = "right")
## Warning in self$bind(): The following regions were missing and are being
## set to NA: 51515
county_polarization202
pol_2016 = (length(which(polarization10$value=="<10% Margin")) / dim(polarization10)[1]) * 100
pol_2012 = (length(which(polarization102$value=="<10% Margin")) / dim(polarization102)[1]) * 100
increase_pol10 = pol_2012 - pol_2016
cat("% Counties Polarization Increase @ 10% Level: ", increase_pol10, "\n")
## % Counties Polarization Increase @ 10% Level: 7.326478
pol_20162 = (length(which(polarization20$value=="<20% Margin")) / dim(polarization20)[1]) * 100
pol_20122 = (length(which(polarization202$value=="<20% Margin")) / dim(polarization202)[1]) * 100
increase_pol20 = pol_20122 - pol_20162
cat("% Counties Polarization Increase @ 20% Level: ", increase_pol20, "\n")
## % Counties Polarization Increase @ 20% Level: 13.81748
cat("% Counties that were within 20% points: ", (length(which(polarization20$value=="<20% Margin")) / dim(polarization20)[1]) * 100, "\n")
## % Counties that were within 20% points: 22.01157
cat("% Counties that were within 10% points: ", (length(which(polarization10$value=="<10% Margin")) / dim(polarization10)[1]) * 100, "\n")
## % Counties that were within 10% points: 10.50771
votes$AvgPol_2016 = votes$total_votes_2016 * votes$margin_2016
votes$AvgPol_2012 = votes$total_votes_2012 * votes$margin_2012
polarization_2016 = sum(votes$AvgPol_2016) / sum(votes$total_votes_2016)
cat('Polarization 2016: ',polarization_2016,'\n')
## Polarization 2016: 27.88361
polarization_2012 = sum(votes$AvgPol_2012) / sum(votes$total_votes_2012)
cat('Polarization 2012:', polarization_2012,'\n')
## Polarization 2012: 24.21305
pol_change = polarization_2016 - polarization_2012
cat('Polarization Change: ',pol_change,'\n')
## Polarization Change: 3.670557