diff --git a/scripts/compare_skims.py b/scripts/compare_skims.py new file mode 100644 index 00000000..82c95ca6 --- /dev/null +++ b/scripts/compare_skims.py @@ -0,0 +1,71 @@ +# %% +import pandas as pd +import openmatrix as omx +from pathlib import Path + +import numpy as np + +network_fid_path = Path( + r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result" +) +# network_fid_path = Path(r"D:\TEMP\TM2.2.1.1-0.05") + +# %% + + +def read_matrix_as_long_df(path: Path, run_name): + run = omx.open_file(path, "r") + am_time = np.array(run["AM_da_time"]) + index_lables = list(range(am_time.shape[0])) + return ( + pd.DataFrame(am_time, index=index_lables, columns=index_lables) + .stack() + .rename(run_name) + .to_frame() + ) + + +a = read_matrix_as_long_df( + r"D:\TEMP\TM2.2.1.1-New_network_rerun\TM2.2.1.1_new_taz\skim_matrices\highway\HWYSKMAM_taz.omx", + "test", +) +# %% +all_skims = [] +for skim_matrix_path in network_fid_path.rglob("*AM_taz.omx"): + print(skim_matrix_path) + run_name = skim_matrix_path.parts[6] + all_skims.append(read_matrix_as_long_df(skim_matrix_path, run_name)) + +all_skims = pd.concat(all_skims, axis=1) +# %% +# %%% +all_skims.to_csv( + r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\output_summaries\skim_data\skims.csv" +) +# %% +# %% +import geopandas as gpd +from importlib import Path +import pandas as pd + +# %% +output_paths_to_consolidate = Path(r"D:\TEMP\output_summaries") +all_files = [] +for file in output_paths_to_consolidate.glob("*_roadway_network.geojson"): + run_name = file.name[0:5] + print(run_name) + specific_run = gpd.read_file(file) + specific_run["run_number"] = run_name + all_files.append(specific_run) +# %% +all_files = pd.concat(all_files) +# %% +all_files.to_file(output_paths_to_consolidate / "all_runs_concat.gdb") + +# %% + +all_files.drop(columns="geometry").to_csv(output_paths_to_consolidate / "data.csv") +# %% +to_be_shape = all_files[["geometry", "model_link_id"]].drop_duplicates() +print("outputting") +to_be_shape.to_file(output_paths_to_consolidate / "geom_package") diff --git a/scripts/compile_model_runs.py b/scripts/compile_model_runs.py new file mode 100644 index 00000000..553a3656 --- /dev/null +++ b/scripts/compile_model_runs.py @@ -0,0 +1,238 @@ +# %% +import geopandas as gpd +import pandas as pd +import numpy as np +from pathlib import Path +from tqdm import tqdm +from shapely.geometry import LineString + +input_dir = Path( + r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result" +) +output_dir = input_dir / "consolidated_3" + + +# in_file = next(input_dir.rglob('emme_links.shp')) +# print("reading", in_file) +# input2 = gpd.read_file(in_file, engine="pyogrio", use_arrow=True) +# #%% +# print("writing") +# input[["#link_id", "geometry"]].to_file(output_dir / "test_geom.geojson") + +scenarios_to_consolidate = (11, 12, 13, 14, 15) +runs_to_consolidate = (3, 4) +# %% + + +def read_file_and_tag( + path: Path, + columns_to_filter=( + "@ft", + "VOLAU", + "@capacity", + "run_number", + "scenario_number", + "#link_id", + "geometry", + ), +) -> pd.DataFrame: + scenario = file.parent.stem + scenario_number = int(scenario.split("_")[-1]) + if scenario_number not in scenarios_to_consolidate: + return None + + run = file.parent.parent.stem + run_number = int(run.split("_")[-1]) + if run_number not in runs_to_consolidate: + return None + + return_gdf = gpd.read_file(path, engine="pyogrio") + + return_gdf["scenario"] = scenario + return_gdf["scenario_number"] = scenario_number + return_gdf["run"] = run + return_gdf["run_number"] = run_number + + if "VOLAU" not in return_gdf.columns: + print(return_gdf.columns) + print("... No VOLAU, filling with zero") + return_gdf["VOLAU"] = 0 + + return_gdf = return_gdf[list(columns_to_filter)] + + # assert return_gdf["#link_id"].is_unique + + return return_gdf + + +def get_linestring_direction(linestring: LineString) -> str: + if not isinstance(linestring, LineString) or len(linestring.coords) < 2: + raise ValueError("Input must be a LineString with at least two coordinates") + + start_point = linestring.coords[0] + end_point = linestring.coords[-1] + + delta_x = end_point[0] - start_point[0] + delta_y = end_point[1] - start_point[1] + + if abs(delta_x) > abs(delta_y): + if delta_x > 0: + return "East" + else: + return "West" + else: + if delta_y > 0: + return "North" + else: + return "South" + + +# %% + +print("Reading Links...", end="") +all_links = [] +for file in tqdm(input_dir.rglob("run_*/Scenario_*/emme_links.shp")): + print(file) + all_links.append(read_file_and_tag(file)) +links_table = pd.concat(all_links) + +print("done") +# %% +scen_map = {11: "EA", 12: "AM", 13: "MD", 14: "PM", 15: "EV"} + + +def get_return_first_gem(row): + geom_columns = [col for col in row.index if "geometry" in col] + return [ + row[col] + for col in geom_columns + if (row[col] is not None) and (row[col] != np.NAN) + ][0] + + +def combine_tables(dfs, columns_same): + return_frame = dfs[0][columns_same] + + for df in dfs: + run_number = df["run_number"].iloc[0] + + scen_number = df["scenario_number"].iloc[0] + scen_number = scen_map[scen_number] + df["saturation"] = df["VOLAU"] / df["@capacity"] + + df = df[["#link_id", "@capacity", "VOLAU", "geometry", "@ft"]].rename( + columns={ + "@capacity": f"capacity_run{run_number}_scen{scen_number}", + "VOLAU": f"@volau_run{run_number}_scen{scen_number}", + "saturation": f"@saturation_run{run_number}_scen{scen_number}", + "geometry": f"geometry_run{run_number}_scen{scen_number}", + "@ft": f"ft_run{run_number}_scen{scen_number}", + } + ) + # if there are link_ids that are not in the right frame + return_frame = return_frame.merge( + df, how="outer", on="#link_id", validate="1:1" + ) + geometry = return_frame.apply(get_return_first_gem, axis=1) + # remove geometries that are not main geometry + return_frame = return_frame.drop( + columns=[col for col in return_frame.columns if "geometry_" in col] + ) + return_frame["geometry"] = geometry + + return return_frame + + +all_links_no_none = [ + links + for links in all_links + if (links is not None) and (links["#link_id"].is_unique) +] +links_wide_table = combine_tables(all_links_no_none, ["#link_id", "geometry"]) + +links_wide_table["direction"] = links_wide_table["geometry"].apply( + get_linestring_direction +) +# %% +ft_cols = [col for col in links_wide_table.columns if "ft_" in col] + +links_wide_table["ft"] = links_wide_table[ft_cols].max(axis=1) +links_wide_table = links_wide_table.drop(columns=ft_cols) + +# %% +links_wide_table.to_file( + Path( + r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\output_summaries\all_links_data" + ) + / "all_data_wide.geojson" +) + + +# %% +num_iter = {(3, 11): 3, (3, 12): 10, (3, 13): 10, (3, 14): 19, (3, 15): 4, (4, 12): 20} +# %% +all_links_no_none = [ + links for links in all_links if (links is not None) +] # and (links["#link_id"].is_unique)] +for df in all_links_no_none: + df["saturation"] = df["VOLAU"] / df["@capacity"] +ft6_sat = [ + ( + link["run_number"].iloc[0], + link["scenario_number"].iloc[0], + (link.loc[link["@ft"] == 6, "saturation"] > 1).mean(), + ) + for link in all_links_no_none +] + +y = [val for val in num_iter.values()] +x = [x[-1] for x in ft6_sat] +col = [val[0] for val in num_iter.keys()] + +# %% +import matplotlib.pyplot as plt + +plt.scatter(x, y, c=col) + +# Calculate the trendline +z = np.polyfit(x, y, 1) +p = np.poly1d(z) + +# Plot the trendline +plt.plot(x, p(x), color="red") + +plt.xlabel("proportion of ft 6 with saturation > 1") +plt.ylabel("number of iterations to solve") +plt.title("Number of iterations to solve (relative gap = 0.05)") +plt.show() +# %% +import matplotlib.pyplot as plt + +data = [links_wide_table[col] for col in links_wide_table.iloc[:, 2:].columns] + +fig = plt.boxplot(data) +fig.show() + +# -------------------------------------------------------------------------- +# %% +links_table["direction"] = links_table["geometry"].apply(get_linestring_direction) +# %% +links_table.to_file(output_dir / "all_data.geojson", index=False) + + +# %% +def get_link_counts(df: pd.DataFrame): + ret_val = df.value_counts("@ft").sort_index().to_frame().T + total = ret_val.sum(axis=1) + total_minus_8 = total - ret_val[8.0].iloc[0] + ret_val["total"] = total + ret_val["total_minus_8"] = total_minus_8 + + ret_val["run_number"] = df["run_number"].iloc[0] + ret_val["scenario_number"] = df["scenario_number"].iloc[0] + return ret_val + + +pd.concat([get_link_counts(df) for df in all_links]).sort_values( + by=["run_number", "scenario_number"] +) diff --git a/tm2py/components/network/create_tod_scenarios.py b/tm2py/components/network/create_tod_scenarios.py index e9777d14..1df6284e 100644 --- a/tm2py/components/network/create_tod_scenarios.py +++ b/tm2py/components/network/create_tod_scenarios.py @@ -46,7 +46,7 @@ def run(self): # emme_app = self._emme_manager.project(project_path) # self._emme_manager.init_modeller(emme_app) with self._setup(): - # self._create_highway_scenarios() + self._create_highway_scenarios() self._create_transit_scenarios() @_context @@ -101,7 +101,7 @@ def _create_highway_scenarios(self): { "scenarios": 1 + n_time_periods, "full_matrices": 9999, - "extra_attribute_values": 60000000, + "extra_attribute_values": 100000000, } ) # create VDFs & set cross-reference function parameters @@ -628,7 +628,7 @@ def _set_capclass(network): area_type = link["@area_type"] if area_type < 0: link["@capclass"] = -1 - elif (link["@ft"] == 99) & (link["@assignable"] == 1): + elif link["@ft"] == 99: link["@capclass"] = 10 * area_type + 7 else: link["@capclass"] = 10 * area_type + link["@ft"]