Skip to content

Commit 04affb1

Browse files
committed
rebuild to fix double counting bug
1 parent 0a6eddc commit 04affb1

36 files changed

+365
-81
lines changed

RScripts/build_passender_od.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ library(sf)
33
library(tidyr)
44

55
airports <- read_sf("data/airports_pass.gpkg")
6-
pass_int_od <- readRDS("data/CAA_int_od_clean.Rds")
7-
pass_dom_od <- readRDS("data/CAA_dom_od_clean.Rds")
6+
pass_int_od <- readRDS("data/CAA_int_od_clean_v2.Rds")
7+
pass_dom_od <- readRDS("data/CAA_dom_od_clean_v2.Rds")
88

99

1010
head(pass_dom_od)
@@ -37,10 +37,10 @@ pass_od <- pass_od[,c("year","airport1","airport1_country","airport2","airport2_
3737
# come from two airports that seem to be the same location
3838

3939
pass_od <- pass_od %>%
40-
group_by(year,airport1,airport1_country,airport2,airport2_country,airport2) %>%
40+
group_by(year,airport1,airport1_country,airport2,airport2_country) %>%
4141
summarise(total_pax = sum(total_pax))
4242

4343

4444
pass_od_wide <- pivot_wider(pass_od, names_from = "year", values_from = "total_pax")
4545

46-
saveRDS(pass_od_wide, "data/passenger_od_wide.Rds")
46+
saveRDS(pass_od_wide, "data/passenger_od_wide_v2.Rds")

RScripts/clean_passenger_data.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ tmap_mode("view")
66
# Read in
77

88
#pass_transit <- readRDS("data/CAA_transit.Rds")
9-
pass_int_od <- readRDS("data/CAA_int_od.Rds")
10-
pass_dom_od <- readRDS("data/CAA_dom_od.Rds")
9+
pass_int_od <- readRDS("data/CAA_int_od_v2.Rds")
10+
pass_dom_od <- readRDS("data/CAA_dom_od_v2.Rds")
1111

1212
airports_fixed <- readRDS("airports_final.Rds")
1313
airports2 <- readRDS("airports.Rds")
@@ -458,9 +458,9 @@ pass_dom_od <- pass_dom_od %>%
458458
pass_dom_od$key <- NULL
459459

460460

461-
write_sf(airports_all,"data/airports_pass.gpkg")
462-
saveRDS(pass_int_od, "data/CAA_int_od_clean.Rds")
463-
saveRDS(pass_dom_od, "data/CAA_dom_od_clean.Rds")
461+
write_sf(airports_all,"data/airports_pass_v2.gpkg")
462+
saveRDS(pass_int_od, "data/CAA_int_od_clean_v2.Rds")
463+
saveRDS(pass_dom_od, "data/CAA_dom_od_clean_v2.Rds")
464464

465465
qtm(airports_all)
466466

RScripts/import_airport_data.R

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ for(i in 1:length(zips)){
157157

158158
for(i in seq(length(zips) + 1, length(zips) + 4)){
159159
dir <- i - length(zips) + 2014
160+
message(dir)
160161
files_csv <- list.files(file.path(path, dir), pattern = ".csv", full.names = TRUE)
161162
files <- files_csv
162163

@@ -189,10 +190,18 @@ for(i in seq(length(zips) + 1, length(zips) + 4)){
189190
}
190191
names(int_od) <- c("year","uk_airport","foreign_country","foreign_airport","total_pax", "scheduled_pax","charter_pax")
191192

192-
dom_od <- files[grep("dom_air_pax_route", files,ignore.case = TRUE)]
193+
dom_od <- files[grep("domestic_air_pax_traffic_route", files,ignore.case = TRUE)]
193194
if(length(dom_od) > 1){
194195
dom_od <- dom_od[1]
195196
}
197+
198+
if(length(dom_od) == 0){
199+
dom_od <- files[grep("dom_air_pax_route", files,ignore.case = TRUE)]
200+
if(length(dom_od) > 1){
201+
dom_od <- dom_od[1]
202+
}
203+
}
204+
196205
dom_od <- readr::read_csv(dom_od)
197206
if(all(c("this_period","apt1_apt_name","apt2_apt_name","total_pax_tp", "total_pax_shd_tp", "total_pax_cht_tp") %in% names(dom_od))){
198207
dom_od <- dom_od[,c("this_period","apt1_apt_name","apt2_apt_name","total_pax_tp", "total_pax_shd_tp", "total_pax_cht_tp")]
@@ -222,6 +231,6 @@ transit <- dplyr::bind_rows(res_transit)
222231
int_od <- dplyr::bind_rows(res_int_od)
223232
dom_od <- dplyr::bind_rows(res_dom_od[lengths(res_dom_od) >0])
224233

225-
saveRDS(transit, "data/CAA_transit.Rds")
226-
saveRDS(int_od, "data/CAA_int_od.Rds")
227-
saveRDS(dom_od, "data/CAA_dom_od.Rds")
234+
saveRDS(transit, "data/CAA_transit_v2.Rds")
235+
saveRDS(int_od, "data/CAA_int_od_v2.Rds")
236+
saveRDS(dom_od, "data/CAA_dom_od_v2.Rds")

RScripts/merge_pass_flight_ods.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ library(tmap)
44
tmap_mode("view")
55

66
# Load Passenger OD Data
7-
pass_od <- readRDS("data/passenger_od_wide.Rds")
7+
pass_od <- readRDS("data/passenger_od_wide_v2.Rds")
88
pass_od <- pass_od[!pass_od$airport2 %in% c("=","Unknown"),]
99
pass_od$airport1 <- gsub(" International","",pass_od$airport1)
1010
pass_od$airport2 <- gsub(" International","",pass_od$airport2)

RScripts/new_method/OD2linestring.R

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ library(tmap)
33
library(dplyr)
44
tmap_mode("view")
55

6-
od <- readRDS("data/clean/pass_flighs_od.Rds")
7-
airports <- read_sf("data/clean/airports_clean_second_pass.gpkg")
6+
od <- readRDS("data/clean/pass_flighs_od_v2.Rds")
7+
airports <- read_sf("data/clean/airports_clean_second_pass_v2.gpkg")
88

99

1010
qtm(airports)
@@ -53,13 +53,18 @@ line <- do.call(c, line)
5353
line <- st_segmentize(line, units::set_units(1, km))
5454
od_good <- as.data.frame(od_good)
5555
od_good$geometry <- line
56+
#stop()
57+
#od_good$length2 <- round(geodist::geodist(st_coordinates(od_good$geom_from), st_coordinates(od_good$geom_to), paired = TRUE, measure = "geodesic") / 1000, 1)
5658
od_good$geom_from <- NULL
5759
od_good$geom_to <- NULL
5860

5961
od_good <- st_as_sf(od_good, crs = 4326)
6062

6163
od_good$length_km <- round(as.numeric(st_length(od_good)) / 1000, 1)
6264

65+
66+
67+
6368
od_good$pass_km_2018 <- od_good$`2018` * od_good$length_km
6469

6570
od_top <- od_good[!is.na(od_good$pass_km_2018),]
@@ -101,4 +106,4 @@ ggplot(paris_sum, aes(Year, Passengers, colour = Airport)) +
101106
scale_x_continuous(breaks = seq(1990,2018,2))
102107

103108
qtm(od_top, lines.col = "pass_km_2018")
104-
write_sf(od_good, "data/clean/od_flights_pass.gpkg")
109+
write_sf(od_good, "data/clean/od_flights_pass_v2.gpkg")

RScripts/new_method/combine_flights_passengers.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
library(sf)
22
library(dplyr)
33

4-
pass_od <- readRDS("data/clean/passenger_od_wide.Rds")
4+
pass_od <- readRDS("data/passenger_od_wide_v2.Rds")
55
flight_od <- readRDS("data/clean/flights_od_prepped.Rds")
66

77
# Load Airports
@@ -447,6 +447,6 @@ flight_od2 <- flight_od %>%
447447
pass_od2 <- pass_od2[rowSums(pass_od2[,as.character(1990:2018)]) != 0,]
448448
flight_od2 <- flight_od2[rowSums(flight_od2[,paste0("flt_",1990:2018)]) != 0,]
449449

450-
saveRDS(pass_od2, "data/clean/passenger_od_first_clean.Rds")
451-
saveRDS(flight_od2, "data/clean/flighs_od_first_clean.Rds")
452-
write_sf(airports, "data/clean/airports_clean_first_pass.gpkg")
450+
saveRDS(pass_od2, "data/clean/passenger_od_first_clean_v2.Rds")
451+
saveRDS(flight_od2, "data/clean/flighs_od_first_clean_v2.Rds")
452+
write_sf(airports, "data/clean/airports_clean_first_pass_v2.gpkg")

RScripts/new_method/combine_flights_passengers_secondpass.R

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
library(sf)
33
library(dplyr)
44

5-
pass_od <- readRDS("data/clean/passenger_od_first_clean.Rds")
6-
flight_od <- readRDS("data/clean/flighs_od_first_clean.Rds")
7-
airports <- read_sf("data/clean/airports_clean_first_pass.gpkg")
5+
pass_od <- readRDS("data/clean/passenger_od_first_clean_v2.Rds")
6+
flight_od <- readRDS("data/clean/flighs_od_first_clean_v2.Rds")
7+
airports <- read_sf("data/clean/airports_clean_first_pass_v2.gpkg")
88
airports <- airports[airports$airport != "Vary (Chalons Sur Marne)",]
99

10-
airports_extra <- data.frame(airport = c("Tabarka","St Maarten","Sydney Canada","Lanseria","Izmir (Cumaovasi)"),
11-
country = c("Tunisia","St Maarten","Canada","Republic of South Africa","Turkey"),
10+
airports_extra <- data.frame(airport = c("Tabarka","St Maarten","Sydney Canada","Lanseria","Izmir (Cumaovasi)","Islamabad"),
11+
country = c("Tunisia","St Maarten","Canada","Republic of South Africa","Turkey","Pakistan"),
1212
geometry = st_sfc(list(
1313
st_point(c(8.876944, 36.98)),
1414
st_point(c(-63.109444, 18.040833)),
1515
st_point(c(-60.048056, 46.161389)),
1616
st_point(c(27.926111, -25.938611)),
17-
st_point(c(27.33114, 37.95034))
17+
st_point(c(27.33114, 37.95034)),
18+
st_point(c(72.82565, 33.549083))
1819
)))
1920
names(airports_extra) <- names(airports)
2021

@@ -29,7 +30,7 @@ airports$geom[airports$airport == "Seoul Afb"] <- st_point(c(127.113889, 37.4458
2930
airports$geom[airports$airport == "Seoul (Kimpo)"] <- st_point(c(126.790556, 37.558056))
3031
airports$geom[airports$airport == "Tollerton Nottingham"] <- st_point(c(-1.080855, 52.91872))
3132
airports$geom[airports$airport == "Vagar"] <- st_point(c(-7.27546, 62.06333))
32-
airports$geom[airports$airport == "Benazir Bhutto"] <- st_point(c(72.83501, 33.55693))
33+
airports$geom[airports$airport == "Benazir Bhutto"] <- st_point(c(73.099167, 33.616389))
3334
airports$geom[airports$airport == "Hong Kong"] <- st_point(c(113.9185, 22.30805))
3435
airports$geom[airports$airport == "Camp Springs (Andrews Afb)"] <- st_point(c(-76.88363, 38.79652))
3536
airports$geom[airports$airport == "Oil Rigs"] <- st_point(c(0.953009, 58.238252))
@@ -127,6 +128,37 @@ tidy_airports("Bursa/Yenisehir","Bursa Yenisehir")
127128
tidy_airports("Corlu (Afb)","Tekirdag (Corlu)")
128129
tidy_airports("Cagliari","Cagliari (Elmas)")
129130

131+
tidy_airports("Los Angeles International","Los Angeles")
132+
tidy_airports("Abu Dhabi International","Abu Dhabi")
133+
tidy_airports("Miami International","Miami")
134+
tidy_airports("Philadelphia International","Philadelphia")
135+
136+
tidy_airports("Denver International","Denver")
137+
tidy_airports("Benazir Bhutto International","Benazir Bhutto")
138+
tidy_airports("Guangzhou Baiyun International","Guangzhou Baiyun")
139+
tidy_airports("Halifax Int","Halifax")
140+
tidy_airports("Ottawa International","Ottawa")
141+
tidy_airports("Tarbes-Lourdes International","Tarbes-Lourdes")
142+
tidy_airports("Islamabad International","Islamabad")
143+
tidy_airports("Jakarta (Soekarno-Hatta Intnl)","Jakarta (Soekarno-Hatta)")
144+
tidy_airports("Jakarta (Soekarno-Hattanl)","Jakarta (Soekarno-Hatta)")
145+
tidy_airports("Changsha Huanghua International","Changsha Huanghua")
146+
tidy_airports("Wuhan Tianhe International","Wuhan Tianhe")
147+
148+
tidy_airports("Male International","Male")
149+
tidy_airports("Chongqing Jiangbei International","Chongqing Jiangbei")
150+
tidy_airports("Auckland International","Auckland")
151+
tidy_airports("Phu Quoc International","Phu Quoc")
152+
tidy_airports("Sanya Phoenix International","Sanya Phoenix")
153+
tidy_airports("Kharkov Osnova Intl","Kharkov Osnova")
154+
tidy_airports("Windsor Locks Bradley Intl","Windsor Locks Bradley")
155+
tidy_airports("East Midlands International","East Midlands")
156+
tidy_airports("Nottingham East Midlands Int'l","East Midlands")
157+
tidy_airports("Manston (Kent Int)","Kent")
158+
tidy_airports("Bali International","Bali")
159+
tidy_airports("Minot International","Minot")
160+
tidy_airports("Erbil International","Erbil")
161+
130162

131163
pass_od$airport2_country[pass_od$airport2 == "Pristina"] <- "Kosovo"
132164
pass_od$airport2_country[pass_od$airport2 == "Bahrain"] <- "Bahrain"
@@ -195,11 +227,11 @@ res <- res[lengths(res) != 0]
195227

196228

197229
all_od <- full_join(pass_od2, flight_od2, by= c("airport1","airport1_country","airport2","airport2_country") )
198-
saveRDS(all_od,"data/clean/pass_flighs_od.Rds")
199-
saveRDS(pass_od2, "data/clean/passenger_od_second_clean.Rds")
200-
saveRDS(flight_od, "data/clean/flighs_od_second_clean.Rds")
230+
saveRDS(all_od,"data/clean/pass_flighs_od_v2.Rds")
231+
saveRDS(pass_od2, "data/clean/passenger_od_second_clean_v2.Rds")
232+
saveRDS(flight_od, "data/clean/flighs_od_second_clean_v2.Rds")
201233
airports <- airports[!duplicated(airports),]
202-
write_sf(airports, "data/clean/airports_clean_second_pass.gpkg")
234+
write_sf(airports, "data/clean/airports_clean_second_pass_v2.gpkg")
203235

204236

205237
# Check Airport Locations
-58 Bytes
Loading
-24 Bytes
Loading

RScripts/new_method/near_OD.png

242 KB
Loading

0 commit comments

Comments
 (0)