1
+ import geopandas as gpd
2
+ import pandas as pd
3
+
4
+ from segment_speed_utils .project_vars import RT_SCHED_GCS , GTFS_DATA_DICT
5
+
6
+ route_dir_stop_cols = [
7
+ "schedule_gtfs_dataset_key" , "name" , "time_period" ,
8
+ "route_id" , "direction_id" ,
9
+ "stop_pair" , "stop_pair_name"
10
+ ]
11
+
12
+ operator_list = df = pd .read_parquet (
13
+ f"{ RT_SCHED_GCS } { GTFS_DATA_DICT .digest_tables .route_segment_speeds } .parquet" ,
14
+ columns = ["name" ]
15
+ ).name .unique ()
16
+
17
+ def route_segment_speeds_ts (
18
+ file : str = GTFS_DATA_DICT .digest_tables .route_segment_speeds ,
19
+ ** kwargs
20
+ ) -> gpd .GeoDataFrame :
21
+ """
22
+ Subset the concatenated time-series route-segment speeds
23
+ and grab all-day only and a smaller set of columns.
24
+ """
25
+ df = gpd .read_parquet (
26
+ f"{ RT_SCHED_GCS } { file } .parquet" ,
27
+ ** kwargs
28
+ #filters = [[("time_period", "==", "all_day")]],
29
+ #columns = route_dir_stop_cols + ["service_date", "p50_mph", "geometry"]
30
+ )
31
+
32
+ return df
33
+
34
+
35
+ def count_time_series_values_by_route_direction_stop (
36
+ df : gpd .GeoDataFrame ,
37
+ group_cols : list
38
+ ) -> pd .DataFrame :
39
+ """
40
+ For each stop, count how many segment variations are available
41
+ across our time-series.
42
+ """
43
+ df2 = (df
44
+ .groupby (group_cols , group_keys = False )
45
+ .agg ({
46
+ "p50_mph" : "count" ,
47
+ "service_date" : "nunique" ,
48
+ "geometry" : "nunique"
49
+ }).reset_index ()
50
+ .rename (columns = {
51
+ "p50_mph" : "n_speed_values" ,
52
+ "service_date" : "n_dates" ,
53
+ "geometry" : "n_geometry"
54
+ })
55
+ )
56
+
57
+ return df2
0 commit comments