pythonpandasdatasetruntime-error

How to solve this runtime error i am getting in python pandas?


MY REQUIREMENT IS TO HAVE THE OUTPUT of the Userdefined Functions open_duration(open_time): OR res_duration(res_time): in the new COLUMN ['Open Resolve Duration'] in the selected_columns_df

import pandas as pdpd.options.mode.copy_on_write = True

    def calculate_totduration(sr_creation, sr_resolved):
    start_date = pd.to_datetime(sr_creation)    end_date = pd.to_datetime(sr_resolved)
    total_minutes = difference.total_seconds()/60    return total_minutes

    def open_duration(open_time):
    print(type(open_time))    #sr_open_time = open_time.astype(int)
    sr_open_time = pd.to_numeric(open_time)    
    if sr_open_time <=240:
        return('Open < 4 hours')    
    elif sr_open_time <=480:
        return('Open from 4 hrs to 8 hrs')    
    elif sr_open_time <=720:
        return('Open from 8 hrs to 12 hrs')    
    elif sr_open_time <=1440:
        return('Open 12 hrs to 24 hrs')    
    elif sr_open_time <=2880:
        return('Open 24 to 48 hrs')
    else:        return('OPEN > 48 hrs')
    
def res_duration(res_time):
    sr_res_time = pd.to_numeric(res_time)  
if sr_res_time <= 240:
        return('Within 4 hours')    
elif sr_res_time <=480:
        return('Bet 4 hrs to 8 hrs')    
elif sr_res_time <=720:
        return('8 hrs to 12 hrs')    
elif sr_res_time <=1440:
        return('12 hrs to 24 hrs')    
else:
        return('> 24 hrs')

df = pd.read_csv('C:\\Users\\XXXXXX\\Desktop\\PythonCodes\\Testing10.csv',sep=',',skiprows=0,low_memory=False,encoding='utf-8')

filtered_df = df[(df['Region/Circle']=='IND')]

selected_columns_df = filtered_df[['Product','Customer Name','SI Number','SI Name','SR Number','SR Status','Region/Circle','Case Type','Source','SR Creation Time','Resolved Time','Total Duration SUM','Case Type','Ser Segment']]

selected_columns_df['Total Minutes'] = selected_columns_df.apply(lambda row:calculate_totduration(row['SR Creation Time'],row['Resolved Time']),axis=1)

if selected_columns_df['SR Status'].str.lower == 'open' or selected_columns_df['SR Status'].str.lower == 're-open':
    selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row:open_duration(calculate_totduration(row['SR Creation Time'],pd.to_datetime('today'))),axis=1)
else:
    selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))    
    #selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(lambda row :calculate_totduration(row['SR Creation Time'],row['Resolved Time'])))

selected_columns_df.to_csv('MYoutfile4.csv')
print('FILE WRITTEN')
print(pd.to_datetime('today'))

But I am getting the following error:

File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py", line 3805, in get_loc return self._engine.get_loc(casted_key) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc File "index.pyx", line 175, in pandas._libs.index.IndexEngine.get_loc File "pandas\_libs\index_class_helper.pxi", line 70, in pandas._libs.index.Int64Engine._check_type KeyError: 'Total Minutes'

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "c:\Users\xXXXXXx\Desktop\PythonCodes\DATASETS.py", line 71, in selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes'])) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\frame.py", line 10374, in apply return op.apply().finalize(self, method="apply") ^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-your textpackages\pandas\core\apply.py", line 916, in apply return self.apply_standard() ^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\apply.py", line 1063, in apply_standard results, res_index = self.apply_series_generator() ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\apply.py", line 1081, in apply_series_generator results[i] = self.func(v, *self.args, **self.kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "c:\Users\xXXXXXx\Desktop\PythonCodes\DATASETS.py", line 71, in selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))

         ~~~^^^^^^^^^^^^^^^^^ 

File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py", line 1121, in getitem return self._get_value(key) ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py", line 1237, in _get_value loc = self.index.get_loc(label) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py", line 3812, in get_loc raise KeyError(key) from err KeyError: 'Total Minutes'


Solution

  • You forgot to add axis=1.

    Replace:

    selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))   
    

    With:

    selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']), axis=1)