-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnm_income.R
94 lines (74 loc) · 2.72 KB
/
nm_income.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#------------------
# Load Libraries
#------------------
library(tidyverse)
library(maps)
library(rvest)
windowsFonts(Arial=windowsFont("Arial"))
#---------------------------
# Import data into dataframe
#---------------------------
nm.income <- read_html("https://en.wikipedia.org/wiki/List_of_New_Mexico_locations_by_per_capita_income") %>%
html_nodes("table") %>%
.[[3]] %>%
html_table() %>%
tbl_df()
nm.income
#-------------------------
# Select only data we want
#-------------------------
nm.household_income <- nm.income %>% select(County, `Median
household
income`)
nm.household_income
colnames(nm.household_income) <- c('county', 'household_income')
nm.household_income
#------------------------
# clean data
#-----------------------
nm.household_income$household_income <- nm.household_income$household_income %>%
str_replace_all('[$,]', '') %>% as.numeric()
nm.household_income$county <- tolower(nm.household_income$county)
nm.household_income$county <- nm.household_income$county %>%
str_replace_all('[?]', 'n')
nm.household_income
map.data <- map_data('county') %>%
filter(region =="new mexico")
head(map.data)
#-----------------------
# compare and join data frames
#--------------------------
anti_join(map.data, nm.household_income, by=c('subregion' = 'county'))
nm.map <- inner_join(map.data, nm.household_income, by=c('subregion' = 'county'))
head(nm.map)
min(nm.map$household_income)
max(nm.map$household_income)
IQR(nm.map$household_income)
#------------------------
# map the data
#-------------------------
ggplot(nm.map) +
geom_polygon(aes(x=long, y=lat, group = group, fill=household_income), color='white') +
coord_fixed(ratio=1.3) +
scale_fill_gradientn(colours = c('grey', 'orange', 'red')
,values = scales::rescale(c(25000,65000,105000))
,breaks = c(25000, 65000, 105000)
,labels=c('$25,000','$65,000','$105,000')
,limits=c(25000,105000)) +
guides(fill = guide_legend(reverse = T)) +
labs(fill = 'Income'
,title = 'Household Income'
,subtitle = 'New Mexico household income by county'
,x = NULL
,y = NULL) +
theme(text = element_text(family='Times', color='black')
,panel.background = element_rect(fill='white')
,plot.title = element_text(size = 28)
,plot.subtitle = element_text(size = 14)
,axis.ticks = element_blank()
,axis.text = element_blank()
,panel.grid = element_blank()
,plot.background = element_rect(fill='white')
,legend.position = 'right'
,legend.background = element_blank()
,legend.key = element_blank())