pythonpandasdataframetimestamp

Cannot compare tz-naive and tz-aware timestamps


I'm finding the error below: Cannot compare tz-naive and tz-aware timestamps

How can I convert dates to fix the issue? The error appear in the end of the syntax below.

from datetime import datetime, timedelta
import pytz
# Define start and end dates
start = datetime(2024, 6, 1)
end = datetime(2024, 12, 10)
current_date = start
# List to store valid dates
valid_dates = []

while current_date <= end:
    valid_dates.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=1)

df['OpenDate'] = pd.to_datetime(df['OpenDate'], utc=True)
df['CloseDate'] = pd.to_datetime(df['CloseDate'], errors='coerce', utc=True)

date_range = pd.date_range(start=start, end=end)

results = []

for index, row in df.iterrows():
    for current_date in date_range:
        if row['OpenDate'] < current_date and (pd.isnull(row['CloseDate']) or 
row['CloseDate'] > current_date) and row['CloseType'] in [0, 1, 3, 7, 8]:
            result_row = {
                'CurrentDate': current_date,
                'User_Id': row['User_Id'],
                'OpenDate': row['OpenDate'],
                'CloseDate': row['CloseDate'],
                'CloseType': row['CloseType']
            }
            results.append(result_row)

Solution

  • try this to convert to tz-aware:

    start = datetime(2024, 6, 1, tzinfo=pytz.UTC)
    end = datetime(2024, 12, 10, tzinfo=pytz.UTC)
    

    And to create a tz-aware time range:

    date_range = pd.date_range(start=start, end=end, tz='UTC')
    

    Complete code:

    from datetime import datetime, timedelta
    import pytz
    import pandas as pd
    
    # Define start and end dates (timezone-aware)
    start = datetime(2024, 6, 1, tzinfo=pytz.UTC)
    end = datetime(2024, 12, 10, tzinfo=pytz.UTC)
    current_date = start
    
    # List to store valid dates
    valid_dates = []
    while current_date <= end:
        valid_dates.append(current_date.strftime('%Y-%m-%d'))
        current_date += timedelta(days=1)
    
    # Convert 'OpenDate' and 'CloseDate' to timezone-aware datetimes
    df['OpenDate'] = pd.to_datetime(df['OpenDate'], utc=True)
    df['CloseDate'] = pd.to_datetime(df['CloseDate'], errors='coerce', utc=True)
    
    # Create a timezone-aware date range
    date_range = pd.date_range(start=start, end=end, tz='UTC')
    
    results = []
    for index, row in df.iterrows():
        for current_date in date_range:
            if row['OpenDate'] < current_date and (pd.isnull(row['CloseDate']) or 
    row['CloseDate'] > current_date) and row['CloseType'] in [0, 1, 3, 7, 8]:
                result_row = {
                    'CurrentDate': current_date,
                    'User_Id': row['User_Id'],
                    'OpenDate': row['OpenDate'],
                    'CloseDate': row['CloseDate'],
                    'CloseType': row['CloseType']
                }
                results.append(result_row)
    

    Hope this helps.