-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_views.py
136 lines (127 loc) · 4.33 KB
/
create_views.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from validation import hive_connection
def create_views():
con,cur=hive_connection()
cur.execute("""
create view nos_arr_delay as
select year, month,dayofmonth,
case dayofweek
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
when 7 then 'Sunday'
end dayofweek,
count(*) as flights_delay
from data_fact_table
where
arrdelay>15
group by year,month,dayofmonth,dayofweek
""")
cur.execute("""
create view nos_dept_delay as
select year, month,dayofmonth,
case dayofweek
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
when 7 then 'Sunday'
end dayofweek,
count(*) as flights_delay
from data_fact_table
where
depdelay>15
group by year,month,dayofmonth,dayofweek
""")
cur.execute("""
create view timeinterval_dep_delay as
select year,dept_interval,count(*) as flights_delay
from
( select year,depdelay,
case
when length(cast(deptime as string))<=2 then '0-1'
when length(cast(deptime as string))=3 then substr(deptime,1,1)||'-'||cast(substr(deptime,1,1) as int)+1
when length(cast(deptime as string))=4 and deptime>=1000 and deptime<=2359 then substr(deptime,1,2)||'-'||cast(substr(deptime,1,2) as int)+1
when length(cast(deptime as string))=4 and deptime>=2500 then substr(deptime,1,2)||'-'||cast(substr(deptime,1,2) as int)+1
when length(cast(deptime as string))=4 and deptime>=2400 and deptime<=2459 then '0-1'
end dept_interval
from data_fact_table
where depdelay>15 ) as time_interval
group by year,dept_interval
having dept_interval<>'25-26' and
dept_interval<>'26-27' and
dept_interval<>'27-28' and
dept_interval<>'28-29' and
dept_interval<>'29-30'
order by flights_delay
""")
cur.execute("""
create view timeinterval_arr_delay as
select year,arr_interval,count(*) as flights_delay
from
(select year,arrdelay,
case
when length(cast(arrtime as string))<=2 then '0-1'
when length(cast(arrtime as string))=3 then substr(arrtime,1,1)||'-'||cast(substr(arrtime,1,1) as int)+1
when length(cast(arrtime as string))=4 and arrtime>=1000 and arrtime<=2359 then substr(arrtime,1,2)||'-'||cast(substr(arrtime,1,2) as int)+1
when length(cast(arrtime as string))=4 and arrtime>=2400 and arrtime<=2459 then '0-1'
when length(cast(arrtime as string))=4 and arrtime>=2500 then substr(arrtime,1,2)||'-'||cast(substr(arrtime,1,2) as int)+1
end arr_interval
from data_fact_table
where arrdelay>15) as time_interval
group by year,arr_interval
having arr_interval<>'25-26' and
arr_interval<>'26-27' and
arr_interval<>'27-28' and
arr_interval<>'28-29' and
arr_interval<>'29-30'
order by flights_delay
""")
cur.execute("""
create view airport_depdelay as
select year,origin,name, count(*) as flights_delay
from
(select year,origin,depdelay,iata,name,city,state,country,lat,long
from data_fact_table f
left join airports_dim_table a on f.origin=a.iata) as airport_delay
where depdelay>15
group by year,origin,name
order by flights_delay desc
""")
cur.execute("""
create view airport_arrdelay as
select year,dest,name, count(*) as flights_delay
from
(select year,dest,arrdelay,iata,name,city,state,country,lat,long
from data_fact_table f
left join airports_dim_table a on f.dest=a.iata) as airport_delay
where arrdelay>15
group by year,dest,name
order by flights_delay desc
""")
cur.execute("""
create view carrier_depdelay as
select year,uniquecarrier,name,count(*) as flights_delay
from
(select year,uniquecarrier, depdelay,name
from data_fact_table f
left join carriers_dim_table c on f.uniquecarrier=c.code) as carrier_delay
where depdelay>15
group by year,uniquecarrier,name
order by flights_delay
""")
cur.execute("""
create view oldplane_depdelay as
select year,count(*) as flights_delay
from
(select depdelay,p.year
from data_fact_table f
left join plane_dim_table p on f.tailnum=p.tailno) as oldplane_delay
where depdelay>15
group by year
order by flights_delay
""")