forked from TheEconomist/big-mac-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data-generator-v2.R
111 lines (91 loc) · 5.23 KB
/
data-generator-v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
library('tidyverse')
library('data.table')
big_mac_countries = c('ARG', 'AUS', 'BRA', 'GBR', 'CAN', 'CHL', 'CHN', 'CZE', 'DNK',
'EGY', 'HKG', 'HUN', 'IDN', 'ISR', 'JPN', 'MYS', 'MEX', 'NZL',
'NOR', 'PER', 'PHL', 'POL', 'RUS', 'SAU', 'SGP', 'ZAF', 'KOR',
'SWE', 'CHE', 'TWN', 'THA', 'TUR', 'ARE', 'USA', 'COL', 'CRI',
'PAK', 'LKA', 'UKR', 'URY', 'IND', 'VNM', 'GTM', 'HND', 'VEN',
'NIC', 'AZE', 'BHR', 'HRV', 'JOR', 'KWT', 'LBN', 'MDA', 'OMN',
'QAT', 'ROU', 'EUZ')
big_mac_data = fread('./source-data/big-mac-source-data-v2.csv', na.strings = '#N/A') %>%
.[!is.na(local_price)] %>% # remove lines where the local price is missing
.[,GDP_local := as.numeric(GDP_local)] %>% # convert GDP to a number
.[order(date, name)] # sort by date and then by country name, for easy reading
latest_date = big_mac_data$date %>% max
big_mac_data[, dollar_price := local_price / dollar_ex]
base_currencies = c('USD', 'EUR', 'GBP', 'JPY', 'CNY')
big_mac_index = big_mac_data[
!is.na(dollar_price) & iso_a3 %in% big_mac_countries
,.(date, iso_a3, currency_code, name, local_price, dollar_ex, dollar_price)]
for(currency in base_currencies) {
big_mac_index[
, # we don't want a subset, so our first argument is blank
(currency) := # we'll add a new column named for the base set
dollar_price / # we divide the dollar price in each row by
# the dollar price on the *base currency*'s row (.SD is a data.table
.SD[currency_code == currency]$dollar_price - # that contains only the current group)
1, # one means parity (neither over- nor under-valued), so we subtract one
# to get an over/under-valuation value
by=date # and of course, we'll group these rows by date
]
}
big_mac_index[, (base_currencies) := round(.SD, 5L), .SDcols=base_currencies]
to_plot = big_mac_index[date == latest_date]
to_plot$name = factor(to_plot$name, levels=to_plot$name[order(to_plot$USD)])
fwrite(big_mac_index, './output-data/big-mac-raw-index.csv')
big_mac_gdp_data = big_mac_data[GDP_local > 0]
regression_countries = c('ARG', 'AUS', 'BRA', 'GBR', 'CAN', 'CHL', 'CHN', 'CZE', 'DNK',
'EGY', 'EUZ', 'HKG', 'HUN', 'IDN', 'ISR', 'JPN', 'MYS', 'MEX',
'NZL', 'NOR', 'PER', 'PHL', 'POL', 'RUS', 'SAU', 'SGP', 'ZAF',
'KOR', 'SWE', 'CHE', 'TWN', 'THA', 'TUR', 'USA', 'COL', 'PAK',
'IND', 'AUT', 'BEL', 'NLD', 'FIN', 'FRA', 'DEU', 'IRL', 'ITA',
'PRT', 'ESP', 'GRC', 'EST')
# in 2021, we added a number of additional countries to the adjusted index
regression_addons_2021 = c('ARE', 'CRI', 'LKA', 'UKR', 'URY', 'VNM', 'GTM', 'HND', 'NIC',
'AZE', 'BHR', 'HRV', 'JOR', 'KWT', 'MDA', 'OMN', 'QAT', 'ROU',
'SVK', 'SVN', 'LVA', 'LTU')
big_mac_gdp_data = big_mac_gdp_data[iso_a3 %in% regression_countries |
(iso_a3 %in% regression_addons_2021 & date >= as.Date('2021-01-01'))
]
big_mac_gdp_data %>%
.[,GDP_bigmac := GDP_local / (local_price / .SD[iso_a3=='USA']$local_price), by=date]
big_mac_gdp_data[
,
`:=`(
adj_price=lm(dollar_price ~ GDP_bigmac) %>% predict
# adj_price_USD=lm(dollar_price ~ GDP_dollar) %>% predict
),
by = date]
big_mac_adj_index = big_mac_gdp_data[
!is.na(dollar_price) &
(
iso_a3 %in% regression_countries |
iso_a3 %in% regression_addons_2021 & date >= '2021-01-01'
) &
iso_a3 %in% big_mac_countries
,.(date, iso_a3, currency_code, name, local_price, dollar_ex, dollar_price, GDP_bigmac, adj_price)]
for(currency in base_currencies) {
big_mac_adj_index[
, # we don't want a subset, so our first argument is blank
(currency) := # we'll add a new column named for the base set
( # we divide the dollar price by the adjusted price to get
dollar_price / adj_price # the deviation from our expectation by
) /
# the same figure from the *base currency*'s rowa\
(
.SD[currency_code == currency]$dollar_price /
.SD[currency_code == currency]$adj_price
) -
1, # one means parity (neither over- nor under-valued), so we subtract one
# to get an over/under-valuation value
by=date # and of course, we'll group these rows by date
]
}
big_mac_adj_index[, (base_currencies) := round(.SD, 5L), .SDcols=base_currencies]
fwrite(big_mac_adj_index, './output-data/big-mac-adjusted-index.csv')
big_mac_full_index = merge(big_mac_index, big_mac_adj_index,
by=c('date', 'iso_a3', 'currency_code', 'name', 'local_price', 'dollar_ex', 'dollar_price'),
suffixes=c('_raw', '_adjusted'),
all.x=TRUE
)
fwrite(big_mac_full_index, './output-data/big-mac-full-index.csv')