Skip to content

Commit ba4a12e

Browse files
dhenslealetzdy
andauthored
Preprocessing & Annotation functionality (#957)
* initial batch of models with pre and post processing added * nonmand sched and transit pass models * first pass at preprocessing and annotate functionality in all models * fixing bugs in jtf and trip purpose * adding persons back in to locals_d in jtc * model name missing in tour scheduling * missing expressions import in tour sched prob * ci unit tests & fixing estimation test error * addressing review comments --------- Co-authored-by: Ali Etezady <[email protected]>
1 parent 022775b commit ba4a12e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1288
-807
lines changed

activitysim/abm/models/atwork_subtour_destination.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pandas as pd
88

99
from activitysim.abm.models.util import tour_destination
10-
from activitysim.core import config, estimation, los, tracing, workflow
10+
from activitysim.core import config, estimation, los, tracing, workflow, expressions
1111
from activitysim.core.configuration.logit import TourLocationComponentSettings
1212
from activitysim.core.util import assign_in_place
1313

@@ -120,3 +120,11 @@ def atwork_subtour_destination(
120120
state.tracing.trace_df(
121121
tours, label="atwork_subtour_destination", columns=["destination"]
122122
)
123+
124+
expressions.annotate_tables(
125+
state,
126+
locals_dict={},
127+
skims=None,
128+
model_settings=model_settings,
129+
trace_label=trace_label,
130+
)

activitysim/abm/models/atwork_subtour_frequency.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ class AtworkSubtourFrequencySettings(LogitComponentSettings, extra="forbid"):
3737
Settings for the `atwork_subtour_frequency` component.
3838
"""
3939

40-
preprocessor: PreprocessorSettings | None = None
41-
"""Setting for the preprocessor."""
40+
# no additional fields are required for this component
41+
pass
4242

4343

4444
@workflow.step
@@ -92,15 +92,15 @@ def atwork_subtour_frequency(
9292
nest_spec = config.get_logit_model_settings(model_settings)
9393
constants = config.get_model_constants(model_settings)
9494

95-
# - preprocessor
96-
preprocessor_settings = model_settings.preprocessor
97-
if preprocessor_settings:
98-
expressions.assign_columns(
99-
state,
100-
df=work_tours,
101-
model_settings=preprocessor_settings,
102-
trace_label=trace_label,
103-
)
95+
# preprocess choosers
96+
expressions.annotate_preprocessors(
97+
state,
98+
df=work_tours,
99+
locals_dict=constants,
100+
skims=None,
101+
model_settings=model_settings,
102+
trace_label=trace_label,
103+
)
104104

105105
if estimator:
106106
estimator.write_spec(model_settings)
@@ -164,3 +164,11 @@ def atwork_subtour_frequency(
164164

165165
if trace_hh_id:
166166
state.tracing.trace_df(tours, label="atwork_subtour_frequency.tours")
167+
168+
expressions.annotate_tables(
169+
state,
170+
locals_dict=constants,
171+
skims=None,
172+
model_settings=model_settings,
173+
trace_label=trace_label,
174+
)

activitysim/abm/models/atwork_subtour_mode_choice.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -195,21 +195,18 @@ def atwork_subtour_mode_choice(
195195
)
196196
state.add_table("tours", tours)
197197

198-
# - annotate tours table
199-
if model_settings.annotate_tours:
200-
tours = state.get_dataframe("tours")
201-
expressions.assign_columns(
202-
state,
203-
df=tours,
204-
model_settings=model_settings.annotate_tours,
205-
trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"),
206-
)
207-
state.add_table("tours", tours)
208-
209198
if trace_hh_id:
210199
state.tracing.trace_df(
211200
tours[tours.tour_category == "atwork"],
212201
label=tracing.extend_trace_label(trace_label, mode_column_name),
213202
slicer="tour_id",
214203
index_label="tour_id",
215204
)
205+
206+
expressions.annotate_tables(
207+
state,
208+
locals_dict=constants,
209+
skims=skims,
210+
model_settings=model_settings,
211+
trace_label=trace_label,
212+
)

activitysim/abm/models/atwork_subtour_scheduling.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,11 @@ def atwork_subtour_scheduling(
141141
trace_label,
142142
"tour_map",
143143
)
144+
145+
expressions.annotate_tables(
146+
state,
147+
locals_dict=constants,
148+
skims=skims,
149+
model_settings=model_settings,
150+
trace_label=trace_label,
151+
)

activitysim/abm/models/auto_ownership.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
1919
from activitysim.core.configuration.logit import LogitComponentSettings
2020

21-
from .util import annotate
22-
2321
logger = logging.getLogger(__name__)
2422

2523

@@ -28,8 +26,8 @@ class AutoOwnershipSettings(LogitComponentSettings, extra="forbid"):
2826
Settings for the `auto_ownership` component.
2927
"""
3028

31-
preprocessor: PreprocessorSettings | None = None
32-
annotate_households: PreprocessorSettings | None = None
29+
# no additional fields are required for this component
30+
pass
3331

3432

3533
@workflow.step
@@ -69,20 +67,14 @@ def auto_ownership_simulate(
6967

7068
logger.info("Running %s with %d households", trace_label, len(choosers))
7169

72-
# - preprocessor
73-
preprocessor_settings = model_settings.preprocessor
74-
if preprocessor_settings:
75-
locals_d = {}
76-
if constants is not None:
77-
locals_d.update(constants)
78-
79-
expressions.assign_columns(
80-
state,
81-
df=choosers,
82-
model_settings=preprocessor_settings,
83-
locals_dict=locals_d,
84-
trace_label=trace_label,
85-
)
70+
expressions.annotate_preprocessors(
71+
state,
72+
df=choosers,
73+
locals_dict=constants,
74+
skims=None,
75+
model_settings=model_settings,
76+
trace_label=trace_label,
77+
)
8678

8779
if estimator:
8880
estimator.write_model_settings(model_settings, model_settings_file_name)
@@ -120,8 +112,13 @@ def auto_ownership_simulate(
120112
"auto_ownership", households.auto_ownership, value_counts=True
121113
)
122114

123-
if model_settings.annotate_households:
124-
annotate.annotate_households(state, model_settings, trace_label)
125-
126115
if trace_hh_id:
127116
state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True)
117+
118+
expressions.annotate_tables(
119+
state,
120+
locals_dict=constants,
121+
skims=None,
122+
model_settings=model_settings,
123+
trace_label=trace_label,
124+
)

activitysim/abm/models/cdap.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,17 @@ class CdapSettings(PydanticReadable, extra="forbid"):
3636
JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv"
3737
JOINT_TOUR_USEFUL_COLUMNS: list[str] | None = None
3838
"""Columns to include from the persons table that will be need to calculate household joint tour utility."""
39-
annotate_persons: PreprocessorSettings | None = None
40-
annotate_households: PreprocessorSettings | None = None
4139
COEFFICIENTS: Path
4240
CONSTANTS: dict[str, Any] = {}
4341
compute_settings: ComputeSettings | None = None
4442

43+
preprocessor: PreprocessorSettings | None = None
44+
"""Preprocess choosers tables before running the model."""
45+
annotate_persons: PreprocessorSettings | None = None
46+
"""Postprocess persons table after model completion."""
47+
annotate_households: PreprocessorSettings | None = None
48+
"""Postprocess households table after model completion."""
49+
4550

4651
@workflow.step
4752
def cdap_simulate(
@@ -171,6 +176,16 @@ def cdap_simulate(
171176
index=True,
172177
)
173178

179+
# preprocess choosers
180+
expressions.annotate_preprocessors(
181+
state,
182+
df=persons_merged,
183+
locals_dict=constants,
184+
skims=None,
185+
model_settings=model_settings,
186+
trace_label=trace_label,
187+
)
188+
174189
if estimator:
175190
estimator.write_model_settings(model_settings, "cdap.yaml")
176191
estimator.write_spec(model_settings, tag="INDIV_AND_HHSIZE1_SPEC")
@@ -241,31 +256,25 @@ def cdap_simulate(
241256
cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False)
242257
choices = choices.astype(cap_cat_type)
243258
persons["cdap_activity"] = choices
244-
245-
expressions.assign_columns(
246-
state,
247-
df=persons,
248-
model_settings=model_settings.annotate_persons,
249-
trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"),
250-
)
251-
252259
state.add_table("persons", persons)
253260

254261
# - annotate households table
255262
if add_joint_tour_utility:
256263
hh_joint = hh_joint.reindex(households.index)
257264
households["has_joint_tour"] = hh_joint
258265

259-
expressions.assign_columns(
260-
state,
261-
df=households,
262-
model_settings=model_settings.annotate_households,
263-
trace_label=tracing.extend_trace_label(trace_label, "annotate_households"),
264-
)
265266
state.add_table("households", households)
266267

267268
tracing.print_summary("cdap_activity", persons.cdap_activity, value_counts=True)
268269
logger.info(
269270
"cdap crosstabs:\n%s"
270271
% pd.crosstab(persons.ptype, persons.cdap_activity, margins=True)
271272
)
273+
274+
expressions.annotate_tables(
275+
state,
276+
locals_dict=constants,
277+
skims=None,
278+
model_settings=model_settings,
279+
trace_label=trace_label,
280+
)

activitysim/abm/models/free_parking.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ class FreeParkingSettings(LogitComponentSettings, extra="forbid"):
2626
Settings for the `free_parking` component.
2727
"""
2828

29-
preprocessor: PreprocessorSettings | None = None
30-
"""Setting for the preprocessor."""
31-
3229
FREE_PARKING_ALT: int
3330
"""The code for free parking."""
3431

@@ -78,21 +75,6 @@ def free_parking(
7875

7976
constants = model_settings.CONSTANTS or {}
8077

81-
# - preprocessor
82-
preprocessor_settings = model_settings.preprocessor
83-
if preprocessor_settings:
84-
locals_d = {}
85-
if constants is not None:
86-
locals_d.update(constants)
87-
88-
expressions.assign_columns(
89-
state,
90-
df=choosers,
91-
model_settings=preprocessor_settings,
92-
locals_dict=locals_d,
93-
trace_label=trace_label,
94-
)
95-
9678
model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC)
9779
coefficients_df = state.filesystem.read_model_coefficients(model_settings)
9880
model_spec = simulate.eval_coefficients(
@@ -101,6 +83,15 @@ def free_parking(
10183

10284
nest_spec = config.get_logit_model_settings(model_settings)
10385

86+
expressions.annotate_preprocessors(
87+
state,
88+
df=choosers,
89+
locals_dict=constants,
90+
skims=None,
91+
model_settings=model_settings,
92+
trace_label=trace_label,
93+
)
94+
10495
if estimator:
10596
estimator.write_model_settings(model_settings, model_settings_file_name)
10697
estimator.write_spec(file_name=model_settings.SPEC)
@@ -144,3 +135,11 @@ def free_parking(
144135

145136
if state.settings.trace_hh_id:
146137
state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
138+
139+
expressions.annotate_tables(
140+
state,
141+
locals_dict=constants,
142+
skims=None,
143+
model_settings=model_settings,
144+
trace_label=trace_label,
145+
)

activitysim/abm/models/joint_tour_composition.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from activitysim.core.configuration.base import PreprocessorSettings
1919
from activitysim.core.configuration.logit import LogitComponentSettings
2020

21+
2122
logger = logging.getLogger(__name__)
2223

2324

@@ -36,8 +37,7 @@ class JointTourCompositionSettings(LogitComponentSettings, extra="forbid"):
3637
Settings for the `joint_tour_composition` component.
3738
"""
3839

39-
preprocessor: PreprocessorSettings | None = None
40-
"""Setting for the preprocessor."""
40+
pass
4141

4242

4343
@workflow.step
@@ -78,26 +78,6 @@ def joint_tour_composition(
7878
"Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]
7979
)
8080

81-
# - run preprocessor
82-
preprocessor_settings = model_settings.preprocessor
83-
if preprocessor_settings:
84-
locals_dict = {
85-
"persons": persons,
86-
"hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x),
87-
}
88-
89-
expressions.assign_columns(
90-
state,
91-
df=households,
92-
model_settings=preprocessor_settings,
93-
locals_dict=locals_dict,
94-
trace_label=trace_label,
95-
)
96-
97-
joint_tours_merged = pd.merge(
98-
joint_tours, households, left_on="household_id", right_index=True, how="left"
99-
)
100-
10181
# - simple_simulate
10282
model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC)
10383
coefficients_df = state.filesystem.read_model_coefficients(model_settings)
@@ -108,6 +88,25 @@ def joint_tour_composition(
10888
nest_spec = config.get_logit_model_settings(model_settings)
10989
constants = config.get_model_constants(model_settings)
11090

91+
locals_dict = {
92+
"persons": persons,
93+
"hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x),
94+
}
95+
locals_dict.update(constants)
96+
97+
expressions.annotate_preprocessors(
98+
state,
99+
df=households,
100+
locals_dict=locals_dict,
101+
skims=None,
102+
model_settings=model_settings,
103+
trace_label=trace_label,
104+
)
105+
106+
joint_tours_merged = pd.merge(
107+
joint_tours, households, left_on="household_id", right_index=True, how="left"
108+
)
109+
111110
if estimator:
112111
estimator.write_spec(model_settings)
113112
estimator.write_model_settings(model_settings, model_settings_file_name)
@@ -156,3 +155,11 @@ def joint_tour_composition(
156155
label="joint_tour_composition.joint_tours",
157156
slicer="household_id",
158157
)
158+
159+
expressions.annotate_tables(
160+
state,
161+
locals_dict=locals_dict,
162+
skims=None,
163+
model_settings=model_settings,
164+
trace_label=trace_label,
165+
)

0 commit comments

Comments
 (0)