-
Notifications
You must be signed in to change notification settings - Fork 0
/
TR_ACS.R
53 lines (43 loc) · 2.51 KB
/
TR_ACS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
### *** Tech Rebalanced 2021 - ACS Microdata exercise *** ###
### by Hayley Brown
# set working directory
setwd("/your/path/to/your/data/")
# use backslashes if using windows
# Download the data from here: https://ceprdata.org/wp-content/acs/data/cepr_acs_2019_dmv.csv.zip
# Data documentation can be found here:
## Variable names and descriptions: https://ceprdata.org/wp-content/acs/vardesc/cepr_acs_2019_varlist.log
## Variable value labels: https://ceprdata.org/wp-content/acs/vardesc/cepr_acs_2019_varlabels.log
## Stata program files used to create extracts: https://ceprdata.org/acs-uniform-data-extracts/acs-programs/
### Note that the documentation is for the full 2019 ACS 1-year sample,
#### but the data we are working with today only includes MD, DC, and VA.
#### The sample for the entire US is a much bigger file.
# load applicable packages
library(dplyr)
library(pollster)
# Read the data into a data frame in R, without unzipping the file
acs19dmv <- read.csv(unz("cepr_acs_2019_dmv.csv.zip", "cepr_acs_2019_dmv.csv"), header = TRUE,
sep = ",")
# select the variables we actually care about for this
acs19dmv_sub <- acs19dmv %>%
select(age,
wbhao,
pubtran,
perwgt)
# create variable with different age groupings, according to race/ ethnicity
## we are ultimately looking for the cross-section of age 50+ who answered the commuting question, so baking in pubtran
acs19dmv_sub <- acs19dmv_sub %>%
mutate(agerace = case_when(age >= 50 & wbhao == 'White' & (pubtran == 0 | pubtran == 1) ~ 'White, age 50+',
age >= 50 & wbhao == 'Black' & (pubtran == 0 | pubtran == 1) ~ 'Black, age 50+',
age >= 50 & wbhao == 'Hispanic' & (pubtran == 0 | pubtran == 1) ~ 'Hispanic, age 50+',
age >= 50 & wbhao == 'AAPI' & (pubtran == 0 | pubtran == 1) ~ 'AAPI, age 50+',
age >= 50 & wbhao == 'Other' & (pubtran == 0 | pubtran == 1) ~ 'Other, age 50+'))
# run some simple crosstabs
crosstab(acs19dmv_sub, pubtran, agerace, w = perwgt)
# note the missing values! if we want to compute shares, those need to go
# get rid of the missing values
acs19dmv_sub <- acs19dmv_sub[!is.na(acs19dmv_sub$agerace),]
# crosstab, excluding missing values
shares <- crosstab(acs19dmv_sub, pubtran, agerace, w = perwgt)
# note the weight - this is important! these are person level variables, so we use perwgt;
# we would use hsgwgt for household level variables
View(shares)