In [1]:
import os
import pandas as pd
import geopandas as gpd
import gtfs_kit as gk

from glob import glob
from math import radians, sin, cos, sqrt, atan2
In [2]:
def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calcola la distanza in metri tra due punti usando la formula di Haversine
    """
    R = 6371000  # Raggio della Terra in metri
    
    # Converte le coordinate in radianti
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    # Formula di Haversine
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = R * c
    
    return distance
def calculate_average_speed(df):
    """
    Calcola la velocità media tra punti consecutivi
    
    Parameters:
    df: DataFrame con colonne 'latitude', 'longitude' e 'timestamp'
    
    Returns:
    DataFrame con le velocità medie calcolate
    """
    # Crea una copia del DataFrame
    df = df.copy()
    
    # Converte il timestamp in datetime se non lo è già
    if not pd.api.types.is_datetime64_any_dtype(df['timestamp']):
        df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Calcola le differenze di tempo in secondi
    df['time_diff'] = df['timestamp'].diff().dt.total_seconds()
    
    # Inizializza la colonna delle distanze
    df['distance'] = 0.0
    
    # Calcola le distanze tra punti consecutivi
    for i in range(1, len(df)):
        df.loc[i, 'distance'] = calculate_distance(
            df.iloc[i-1]['latitude'], df.iloc[i-1]['longitude'],
            df.iloc[i]['latitude'], df.iloc[i]['longitude']
        )
    
    # Calcola la velocità media in km/h
    df['speed_kmh'] = (df['distance'] / df['time_diff']) * 3.6  # Converti m/s in km/h
    
    return df
In [3]:
ilab_path = os.path.join(os.path.expanduser('~'),'ILAB_DATA')
rt_path = os.path.join(ilab_path,'GTFS','RT')
In [4]:
path = 'https://romamobilita.it/sites/default/files/rome_static_gtfs.zip'
feed = gk.read_feed(path, dist_units='km')
routes = feed.routes
trips = feed.trips
stops = feed.stops
stop_times = feed.stop_times
stop_times = stop_times[['stop_id', 'trip_id','stop_sequence']]\
    .merge(stops[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']])\
    .merge(trips[['trip_id', 'route_id']])\
    .merge(routes[['route_id', 'route_short_name']])
In [5]:
file_list = glob(os.path.join(rt_path,'vehicles*.parquet'))
file_list.sort()
df_list = list()
for file in file_list:
    t = pd.read_parquet(file)
    t['timestamp'] = pd.to_datetime(t['timestamp'], unit='s')
    if not isinstance(t,gpd.GeoDataFrame):
        t = gpd.GeoDataFrame(t, geometry=gpd.points_from_xy(t['vehicle_lon'],t['vehicle_lat']), crs=4326)

    df_list.append(t)
vehicles_df = pd.concat(df_list, ignore_index=True)
In [6]:
vehicle_sel = vehicles_df.vehicle_id.unique()[0]

vehicle = vehicles_df[vehicles_df['vehicle_id']==vehicle_sel].copy()
vehicle['trip_direction_id'] = vehicle['trip_direction_id'].astype(str)
route = vehicle['route_id'].values[0]
stops_sel = stop_times.loc[stop_times['route_id']==route,['stop_id','stop_lat','stop_lon','stop_sequence']].drop_duplicates()
stops_sel = gpd.GeoDataFrame(stops_sel, geometry=gpd.points_from_xy(stops_sel['stop_lon'],stops_sel['stop_lat']), crs=4326)
m = stops_sel.explore(color='red',style_kwds={'radius':10})
m = vehicle.explore(m=m, column='trip_direction_id', name = 'vehicle',style_kwds={'radius':10})
m
Out[6]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [7]:
df = vehicle[vehicle['trip_direction_id']=='1'].sort_values(by='timestamp', ascending=True)
df = df[[ 'vehicle_lat','vehicle_lon','vehicle_odo', 'stop_id', 'occupancy_status',
        'current_stop_sequence', 'current_status', 'timestamp']].drop_duplicates().reset_index(drop=True)
In [8]:
# Calcola le differenze di tempo in secondi
df['time_diff'] = df['timestamp'].diff().dt.total_seconds()

# Inizializza la colonna delle distanze
df['distance'] = 0.0

# Calcola le distanze tra punti consecutivi
for i in range(1, len(df)):
    df.loc[i, 'distance'] = calculate_distance(
        df.iloc[i-1]['vehicle_lat'], df.iloc[i-1]['vehicle_lon'],
        df.iloc[i]['vehicle_lat'], df.iloc[i]['vehicle_lon']
    )

# Calcola la velocità media in km/h
df['speed_kmh'] = (df['distance'] / df['time_diff']) * 3.6  # Converti m/s in km/h
In [9]:
df.head()
Out[9]:
vehicle_lat vehicle_lon vehicle_odo stop_id occupancy_status current_stop_sequence current_status timestamp time_diff distance speed_kmh
0 41.871178 12.540133 352.0 74025 1.0 3.0 2 2024-12-13 15:36:46 NaN 0.000000 NaN
1 41.875088 12.541280 591.0 74027 1.0 5.0 2 2024-12-13 15:38:54 128.0 445.018671 12.516150
2 41.878326 12.541835 791.0 74028 1.0 6.0 2 2024-12-13 15:40:30 96.0 363.044691 13.614176
3 41.878963 12.541917 830.0 74028 1.0 6.0 2 2024-12-13 15:42:40 130.0 71.161921 1.970638
4 41.879604 12.542138 873.0 74028 1.0 6.0 2 2024-12-13 15:44:48 128.0 73.577995 2.069381
In [10]:
df[[ 'vehicle_odo', 'stop_id', 'occupancy_status',
       'current_stop_sequence', 'current_status', 'timestamp', 'time_diff', 'distance', 'speed_kmh']]
Out[10]:
vehicle_odo stop_id occupancy_status current_stop_sequence current_status timestamp time_diff distance speed_kmh
0 352.0 74025 1.0 3.0 2 2024-12-13 15:36:46 NaN 0.000000 NaN
1 591.0 74027 1.0 5.0 2 2024-12-13 15:38:54 128.0 445.018671 12.516150
2 791.0 74028 1.0 6.0 2 2024-12-13 15:40:30 96.0 363.044691 13.614176
3 830.0 74028 1.0 6.0 2 2024-12-13 15:42:40 130.0 71.161921 1.970638
4 873.0 74028 1.0 6.0 2 2024-12-13 15:44:48 128.0 73.577995 2.069381
5 1119.0 74029 1.0 7.0 2 2024-12-13 15:46:56 128.0 437.483203 12.304215
6 1325.0 74031 3.0 9.0 2 2024-12-13 15:50:40 224.0 386.422387 6.210360
7 1576.0 75134 3.0 10.0 2 2024-12-13 15:52:50 130.0 453.605218 12.561375
8 1655.0 71134 3.0 11.0 2 2024-12-13 15:54:58 128.0 156.981447 4.415103
9 2005.0 79286 3.0 13.0 2 2024-12-13 15:56:34 96.0 608.977028 22.836639
10 2321.0 74035 3.0 15.0 2 2024-12-13 15:58:42 128.0 571.070402 16.061355
11 2499.0 74036 3.0 16.0 2 2024-12-13 15:59:46 64.0 327.156679 18.402563
12 2532.0 74037 3.0 17.0 2 2024-12-13 16:02:58 192.0 60.446184 1.133366
13 2691.0 82023 3.0 18.0 2 2024-12-13 16:04:36 98.0 290.867321 10.684922
14 2697.0 82023 3.0 18.0 2 2024-12-13 16:06:44 128.0 10.013119 0.281619
15 10.0 81907 NaN 1.0 1 2024-12-13 16:54:54 2890.0 4591.335687 5.719311
16 10.0 81907 NaN 1.0 1 2024-12-13 16:57:04 130.0 0.579512 0.016048
17 10.0 81907 NaN 1.0 1 2024-12-13 16:58:40 96.0 0.157941 0.005923
18 10.0 81907 NaN 1.0 1 2024-12-13 17:01:20 160.0 0.157941 0.003554
19 12.0 81907 NaN 1.0 1 2024-12-13 17:02:56 96.0 1.862777 0.069854
20 117.0 74024 3.0 2.0 2 2024-12-13 17:04:32 96.0 168.341101 6.312791
21 163.0 74024 3.0 2.0 2 2024-12-13 17:06:42 130.0 87.403372 2.420401
22 233.0 74025 3.0 3.0 2 2024-12-13 17:08:18 96.0 126.969640 4.761361
23 763.0 74027 3.0 5.0 2 2024-12-13 17:11:30 192.0 949.520640 17.803512
24 790.0 74028 3.0 6.0 2 2024-12-13 17:12:34 64.0 54.884433 3.087249
25 896.0 74028 1.0 6.0 2 2024-12-13 17:15:16 162.0 201.471170 4.477137
26 1045.0 74029 1.0 7.0 2 2024-12-13 17:16:52 96.0 251.018586 9.413197
27 1275.0 74030 1.0 8.0 2 2024-12-13 17:19:00 128.0 415.804892 11.694513
28 1471.0 74031 1.0 9.0 2 2024-12-13 17:20:36 96.0 363.919242 13.646972
29 1695.0 71134 3.0 11.0 2 2024-12-13 17:23:16 160.0 411.079512 9.249289
30 2023.0 79286 3.0 13.0 2 2024-12-13 17:24:54 98.0 591.039862 21.711668
31 2376.0 74035 1.0 15.0 2 2024-12-13 17:27:02 128.0 626.642412 17.624318
32 2515.0 74037 3.0 17.0 2 2024-12-13 17:28:38 96.0 254.610776 9.547904
In [ ]: