pythonpandasdataframedatetimedistance

How do I update the coordinates based on most recent datetime?


How would you update the coordinates based on teh date_recorded. For example, there is no observation for 6/27 but if I can assume both cars are driving in the same direction and rate as the last observation, how would I update to calculate distance using the lat/lng for the most recent date_recorded?

import pandas as pd

# Car 1
car1_data = {
    'id': ['id0120169', 'id0386349', 'id1962532', 'id0335207', 'id0273508'],
    'date_recorded': ['7/1/2016 9:05', '6/29/2016 22:26', '6/29/2016 2:47', '6/26/2016 22:29', '6/25/2016 19:19'],
    'longitude': [-73.99017334, -73.97213745, -73.91963959, -73.98487091, -73.99116516],
    'latitude': [40.75667953, 40.75746155, 40.80377579, 40.76160812, 40.75000763],
    'rate': [50, 67, 43, 35, 46],
    'direction': ['N', 'E', 'E', 'S', 'W']}
car1_df = pd.DataFrame(car1_data)

# Car 2
car2_data = {
    'id': ['id2681896', 'id3308448', 'id2108525', 'id3952220', 'id2771348'],
    'date_recorded': ['6/30/2016 23:59', '6/29/2016 19:33', '6/28/2016 20:50', '6/26/2016 21:07', '6/25/2016 18:46'],
    'longitude': [-73.9881286621093, -73.9656295776367, -73.9716567993164, -73.9820938110351, -73.9761657714843],
    'latitude': [40.7320289611816, 40.7527198791503, 40.7945442199707, 40.7694053649902, 40.7602424621582],
    'rate': [81, 76, 59, 48, 55],
    'direction': ['S', 'E', 'N', 'W', 'W']}
car2_df = pd.DataFrame(car2_data)

Solution

  • What you need to do is calculate the movement of each car based on its last observed direction and rate. If we assume both cars move in its last observed direction for 24 hours from its last observation before 6/27.

    So given your data:

    import pandas as pd
    
    car1_data = {
        'id': ['id0120169', 'id0386349', 'id1962532', 'id0335207', 'id0273508'],
        'date_recorded': ['7/1/2016 9:05', '6/29/2016 22:26', '6/29/2016 2:47', '6/26/2016 22:29', '6/25/2016 19:19'],
        'longitude': [-73.99017334, -73.97213745, -73.91963959, -73.98487091, -73.99116516],
        'latitude': [40.75667953, 40.75746155, 40.80377579, 40.76160812, 40.75000763],
        'rate': [50, 67, 43, 35, 46],
        'direction': ['N', 'E', 'E', 'S', 'W']}
    car1_df = pd.DataFrame(car1_data)
    
    car2_data = {
        'id': ['id2681896', 'id3308448', 'id2108525', 'id3952220', 'id2771348'],
        'date_recorded': ['6/30/2016 23:59', '6/29/2016 19:33', '6/28/2016 20:50', '6/26/2016 21:07', '6/25/2016 18:46'],
        'longitude': [-73.9881286621093, -73.9656295776367, -73.9716567993164, -73.9820938110351, -73.9761657714843],
        'latitude': [40.7320289611816, 40.7527198791503, 40.7945442199707, 40.7694053649902, 40.7602424621582],
        'rate': [81, 76, 59, 48, 55],
        'direction': ['S', 'E', 'N', 'W', 'W']}
    car2_df = pd.DataFrame(car2_data)
    

    you should defined the following functions that update the position and predict the position relative the previous one:

    def update_position(row, hours=24):
        delta = row['rate'] * 0.001 * hours
        if row['direction'] == 'N':
            return row['latitude'] + delta, row['longitude']
        elif row['direction'] == 'S':
            return row['latitude'] - delta, row['longitude']
        elif row['direction'] == 'E':
            return row['latitude'], row['longitude'] + delta
        elif row['direction'] == 'W':
            return row['latitude'], row['longitude'] - delta
    
    def preprocess_df(df):
        df['date_recorded'] = pd.to_datetime(df['date_recorded'])
        df.sort_values('date_recorded', inplace=True)
    
    def predict_positions(df, missing_date):
        """
        Predict positions for a given date assuming constant movement from the last recorded position.
        """
        preprocess_df(df)
        df_before_missing = df[df['date_recorded'] < pd.to_datetime(missing_date)].copy()
        if df_before_missing.empty:
            return df
        last_observation = df_before_missing.iloc[-1]
        new_lat, new_long = update_position(last_observation)
        new_row = pd.DataFrame([{
            'id': last_observation['id'],
            'date_recorded': pd.to_datetime(missing_date),
            'longitude': new_long,
            'latitude': new_lat,
            'rate': last_observation['rate'],
            'direction': last_observation['direction']
        }])
        df_updated = pd.concat([df, new_row], ignore_index=True)
        return df_updated
    
    

    So, for the desired date:

    
    
    missing_date = '6/27/2016'
    car1_df_updated = predict_positions(car1_df, missing_date)
    car2_df_updated = predict_positions(car2_df, missing_date)
    

    you'll get for car1

              id       date_recorded  longitude   latitude  rate direction
    0  id0273508 2016-06-25 19:19:00 -73.991165  40.750008    46         W
    1  id0335207 2016-06-26 22:29:00 -73.984871  40.761608    35         S
    2  id1962532 2016-06-29 02:47:00 -73.919640  40.803776    43         E
    3  id0386349 2016-06-29 22:26:00 -73.972137  40.757462    67         E
    4  id0120169 2016-07-01 09:05:00 -73.990173  40.756680    50         N
    5  id0335207 2016-06-27 00:00:00 -73.984871  39.921608    35         S
    

    and for car 2

              id       date_recorded  longitude   latitude  rate direction
    0  id2771348 2016-06-25 18:46:00 -73.976166  40.760242    55         W
    1  id3952220 2016-06-26 21:07:00 -73.982094  40.769405    48         W
    2  id2108525 2016-06-28 20:50:00 -73.971657  40.794544    59         N
    3  id3308448 2016-06-29 19:33:00 -73.965630  40.752720    76         E
    4  id2681896 2016-06-30 23:59:00 -73.988129  40.732029    81         S
    5  id3952220 2016-06-27 00:00:00 -75.134094  40.769405    48         W