Skip to content

Commit 1f42cd3

Browse files
authored
Merge pull request #782 from wsp-sag/data-type-op-pd-cat
Data Type Optimization
2 parents a2ad2a1 + db03dae commit 1f42cd3

File tree

63 files changed

+797
-145
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+797
-145
lines changed

activitysim/abm/models/atwork_subtour_destination.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,21 @@ def atwork_subtour_destination(
8989
estimator.end_estimation()
9090

9191
subtours[destination_column_name] = choices_df["choice"]
92-
assign_in_place(tours, subtours[[destination_column_name]])
92+
assign_in_place(
93+
tours,
94+
subtours[[destination_column_name]],
95+
state.settings.downcast_int,
96+
state.settings.downcast_float,
97+
)
9398

9499
if want_logsums:
95100
subtours[logsum_column_name] = choices_df["logsum"]
96-
assign_in_place(tours, subtours[[logsum_column_name]])
101+
assign_in_place(
102+
tours,
103+
subtours[[logsum_column_name]],
104+
state.settings.downcast_int,
105+
state.settings.downcast_float,
106+
)
97107

98108
state.add_table("tours", tours)
99109

activitysim/abm/models/atwork_subtour_frequency.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,15 @@
2424

2525
def add_null_results(state, trace_label, tours):
2626
logger.info("Skipping %s: add_null_results", trace_label)
27-
tours["atwork_subtour_frequency"] = np.nan
27+
cat_type = pd.api.types.CategoricalDtype(
28+
[""],
29+
ordered=False,
30+
)
31+
choices = choices.astype(cat_type)
32+
tours["atwork_subtour_frequency"] = ""
33+
tours["atwork_subtour_frequency"] = tours["atwork_subtour_frequency"].astype(
34+
cat_type
35+
)
2836
state.add_table("tours", tours)
2937

3038

@@ -117,6 +125,11 @@ def atwork_subtour_frequency(
117125

118126
# convert indexes to alternative names
119127
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
128+
cat_type = pd.api.types.CategoricalDtype(
129+
alternatives.index.tolist() + [""],
130+
ordered=False,
131+
)
132+
choices = choices.astype(cat_type)
120133

121134
if estimator:
122135
estimator.write_choices(choices)
@@ -137,6 +150,12 @@ def atwork_subtour_frequency(
137150

138151
subtours = process_atwork_subtours(state, work_tours, alternatives)
139152

153+
# convert purpose to pandas categoricals
154+
purpose_type = pd.api.types.CategoricalDtype(
155+
alternatives.columns.tolist() + ["atwork"], ordered=False
156+
)
157+
subtours["tour_type"] = subtours["tour_type"].astype(purpose_type)
158+
140159
tours = state.extend_table("tours", subtours)
141160

142161
state.tracing.register_traceable_table("tours", subtours)

activitysim/abm/models/atwork_subtour_mode_choice.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,9 @@ def atwork_subtour_mode_choice(
190190
"%s choices" % trace_label, choices_df[mode_column_name], value_counts=True
191191
)
192192

193-
assign_in_place(tours, choices_df)
193+
assign_in_place(
194+
tours, choices_df, state.settings.downcast_int, state.settings.downcast_float
195+
)
194196
state.add_table("tours", tours)
195197

196198
# - annotate tours table

activitysim/abm/models/atwork_subtour_scheduling.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def atwork_subtour_scheduling(
111111
choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
112112
)
113113

114-
assign_in_place(tours, tdd_choices)
114+
assign_in_place(
115+
tours, tdd_choices, state.settings.downcast_int, state.settings.downcast_float
116+
)
115117
state.add_table("tours", tours)
116118

117119
if trace_hh_id:

activitysim/abm/models/cdap.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ def cdap_simulate(
219219
estimator.end_estimation()
220220

221221
choices = choices.reindex(persons.index)
222+
cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False)
223+
choices = choices.astype(cap_cat_type)
222224
persons["cdap_activity"] = choices
223225

224226
expressions.assign_columns(

activitysim/abm/models/joint_tour_composition.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
def add_null_results(state, trace_label, tours):
2525
logger.info("Skipping %s: add_null_results" % trace_label)
2626
tours["composition"] = ""
27+
cat_type = pd.api.types.CategoricalDtype(
28+
["", "adults", "children", "mixed"], ordered=False
29+
)
30+
tours["composition"] = tours["composition"].astype(cat_type)
2731
state.add_table("tours", tours)
2832

2933

@@ -123,6 +127,10 @@ def joint_tour_composition(
123127

124128
# convert indexes to alternative names
125129
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
130+
cat_type = pd.api.types.CategoricalDtype(
131+
model_spec.columns.tolist() + [""], ordered=False
132+
)
133+
choices = choices.astype(cat_type)
126134

127135
if estimator:
128136
estimator.write_choices(choices)
@@ -134,7 +142,7 @@ def joint_tour_composition(
134142
joint_tours["composition"] = choices
135143

136144
# reindex since we ran model on a subset of households
137-
tours["composition"] = choices.reindex(tours.index).fillna("").astype(str)
145+
tours["composition"] = choices.reindex(tours.index).fillna("")
138146
state.add_table("tours", tours)
139147

140148
tracing.print_summary(

activitysim/abm/models/joint_tour_destination.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,22 @@ def joint_tour_destination(
8787

8888
# add column as we want joint_tours table for tracing.
8989
joint_tours["destination"] = choices_df.choice
90-
assign_in_place(tours, joint_tours[["destination"]])
90+
assign_in_place(
91+
tours,
92+
joint_tours[["destination"]],
93+
state.settings.downcast_int,
94+
state.settings.downcast_float,
95+
)
9196
state.add_table("tours", tours)
9297

9398
if want_logsums:
9499
joint_tours[logsum_column_name] = choices_df["logsum"]
95-
assign_in_place(tours, joint_tours[[logsum_column_name]])
100+
assign_in_place(
101+
tours,
102+
joint_tours[[logsum_column_name]],
103+
state.settings.downcast_int,
104+
state.settings.downcast_float,
105+
)
96106

97107
tracing.print_summary("destination", joint_tours.destination, describe=True)
98108

activitysim/abm/models/joint_tour_frequency.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ def joint_tour_frequency(
116116

117117
# convert indexes to alternative names
118118
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
119+
cat_type = pd.api.types.CategoricalDtype(
120+
model_spec.columns.tolist(),
121+
ordered=False,
122+
)
123+
choices = choices.astype(cat_type)
119124

120125
if estimator:
121126
estimator.write_choices(choices)
@@ -138,6 +143,12 @@ def joint_tour_frequency(
138143

139144
joint_tours = process_joint_tours(state, choices, alternatives, temp_point_persons)
140145

146+
# convert purpose to pandas categoricals
147+
purpose_type = pd.api.types.CategoricalDtype(
148+
alternatives.columns.tolist(), ordered=False
149+
)
150+
joint_tours["tour_type"] = joint_tours["tour_type"].astype(purpose_type)
151+
141152
tours = state.extend_table("tours", joint_tours)
142153

143154
state.tracing.register_traceable_table("tours", joint_tours)
@@ -147,8 +158,8 @@ def joint_tour_frequency(
147158

148159
# we expect there to be an alt with no tours - which we can use to backfill non-travelers
149160
no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
150-
households["joint_tour_frequency"] = (
151-
choices.reindex(households.index).fillna(no_tours_alt).astype(str)
161+
households["joint_tour_frequency"] = choices.reindex(households.index).fillna(
162+
no_tours_alt
152163
)
153164

154165
households["num_hh_joint_tours"] = (

activitysim/abm/models/joint_tour_participation.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def get_tour_satisfaction(candidates, participate):
9292

9393
x = (
9494
candidates[cols]
95-
.groupby(["tour_id", "composition"])
95+
.groupby(["tour_id", "composition"], observed=True)
9696
.agg(
9797
participants=("adult", "size"),
9898
adults=("adult", "sum"),
@@ -475,7 +475,12 @@ def joint_tour_participation(
475475
# update number_of_participants which was initialized to 1
476476
joint_tours["number_of_participants"] = participants.groupby("tour_id").size()
477477

478-
assign_in_place(tours, joint_tours[["person_id", "number_of_participants"]])
478+
assign_in_place(
479+
tours,
480+
joint_tours[["person_id", "number_of_participants"]],
481+
state.settings.downcast_int,
482+
state.settings.downcast_float,
483+
)
479484

480485
state.add_table("tours", tours)
481486

activitysim/abm/models/joint_tour_scheduling.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,9 @@ def joint_tour_scheduling(
161161
choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
162162
)
163163

164-
assign_in_place(tours, choices)
164+
assign_in_place(
165+
tours, choices, state.settings.downcast_int, state.settings.downcast_float
166+
)
165167
state.add_table("tours", tours)
166168

167169
# updated df for tracing

0 commit comments

Comments
 (0)