-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore_population.Rmd
110 lines (90 loc) · 3.16 KB
/
explore_population.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
---
title: "R Notebook"
output:
pdf_document: default
html_notebook: default
---
```{r}
library(ggplot2)
library(reshape2) # melt
```
# Explore Population Data
## Total Population
```{r}
pop_data<-read.csv("data/population/UK_Projected_Population_2020.csv")
total_pop<-sum(pop_data$Total)
female_to_male<-sum(pop_data$Females)/sum(pop_data$Males)
paste("UK wide Female to Male Ratio: ",round(female_to_male,3))
```
```{r}
# Longform data for grouped columns
long_data<-melt(data=pop_data, id.vars="Age",measure.vars=c("Females","Males"))
# plot
pop_by_age<-ggplot(data=long_data, aes(x=Age,y=value,fill=variable)) +
geom_bar(stat="identity",position=position_dodge()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Population")
pop_by_age
```
## Gender Ratio
```{r}
female_to_male<-pop_data$Females/pop_data$Males
# label min and max columns
labels<-round(female_to_male,2)
labels[which(labels!=max(labels) & labels!=min(labels))]<-""
p<-barplot(female_to_male,ylim=c(.5,max(female_to_male)*1.1),xpd=FALSE,names=pop_data$Age,las=2,ylab="Female:Male")
p<-text(x=p,y=female_to_male,label=labels,pos=3,cex=0.8,col="red")
p<-p+abline(h=1,col="red")
```
## Regional
```{r}
regional_data<-read.csv("data/population/ENG_WAL_Population_Region_2019.csv")
head(regional_data)
```
```{r}
# Longform data for grouped columns
long_data<-melt(data=regional_data, id.vars="Region",measure.vars=c("Females","Males"))
long_data
pop_by_region<-ggplot(data=long_data, aes(x=Region,y=value,fill=variable)) +
geom_bar(stat="identity",position=position_dodge()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Population")
pop_by_region
```
## Ethnicity
```{r}
ethnic_data<-read.csv("data/population/ENG_WAL_Population_Region_Ethnicity.csv")
head(ethnic_data)
```
```{r}
# How to read column with variable specifying name
# r<-"East"
# ethnic_data[[r]]
estimated_data<-data.frame(matrix(ncol=7,nrow=0)) # prepare data frame
# estimate population of each ethnicity in each region
for (r in regional_data$Region) {
r_col<-gsub(" ",".",r)
estimates<-c()
for (i in seq(to=dim(ethnic_data)[1])) {
e<-ethnic_data$X[i]
ethnic_percent<-ethnic_data[[r_col]][i]/100
total_pop<-regional_data[which(regional_data$Region==r),]$Total
estimate<-round(ethnic_percent*total_pop)
estimates<-c(estimates,estimate)
}
estimated_data<-rbind(estimated_data,c(r,estimates))
}
colnames(estimated_data)<-c("Region","Asain","Black","White.British","Mixed","White.other","Other")
estimated_data[,2:7]<-sapply(estimated_data[,2:7],as.integer)
estimated_data
```
```{r}
# make data long-form for stackable columns
long_data<-melt(data=estimated_data, id.vars="Region",measure.vars=colnames(estimated_data)[2:7])
long_data
pop_by_ethnicity<-ggplot(data=long_data, aes(x=Region,y=value,fill=variable)) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Estimated Population")
pop_by_ethnicity
```