MY REQUIREMENT IS TO HAVE THE OUTPUT of the Userdefined Functions open_duration(open_time): OR res_duration(res_time): in the new COLUMN ['Open Resolve Duration'] in the selected_columns_df
import pandas as pdpd.options.mode.copy_on_write = True
def calculate_totduration(sr_creation, sr_resolved):
start_date = pd.to_datetime(sr_creation) end_date = pd.to_datetime(sr_resolved)
total_minutes = difference.total_seconds()/60 return total_minutes
def open_duration(open_time):
print(type(open_time)) #sr_open_time = open_time.astype(int)
sr_open_time = pd.to_numeric(open_time)
if sr_open_time <=240:
return('Open < 4 hours')
elif sr_open_time <=480:
return('Open from 4 hrs to 8 hrs')
elif sr_open_time <=720:
return('Open from 8 hrs to 12 hrs')
elif sr_open_time <=1440:
return('Open 12 hrs to 24 hrs')
elif sr_open_time <=2880:
return('Open 24 to 48 hrs')
else: return('OPEN > 48 hrs')
def res_duration(res_time):
sr_res_time = pd.to_numeric(res_time)
if sr_res_time <= 240:
return('Within 4 hours')
elif sr_res_time <=480:
return('Bet 4 hrs to 8 hrs')
elif sr_res_time <=720:
return('8 hrs to 12 hrs')
elif sr_res_time <=1440:
return('12 hrs to 24 hrs')
else:
return('> 24 hrs')
df = pd.read_csv('C:\\Users\\XXXXXX\\Desktop\\PythonCodes\\Testing10.csv',sep=',',skiprows=0,low_memory=False,encoding='utf-8')
filtered_df = df[(df['Region/Circle']=='IND')]
selected_columns_df = filtered_df[['Product','Customer Name','SI Number','SI Name','SR Number','SR Status','Region/Circle','Case Type','Source','SR Creation Time','Resolved Time','Total Duration SUM','Case Type','Ser Segment']]
selected_columns_df['Total Minutes'] = selected_columns_df.apply(lambda row:calculate_totduration(row['SR Creation Time'],row['Resolved Time']),axis=1)
if selected_columns_df['SR Status'].str.lower == 'open' or selected_columns_df['SR Status'].str.lower == 're-open':
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row:open_duration(calculate_totduration(row['SR Creation Time'],pd.to_datetime('today'))),axis=1)
else:
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))
#selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(lambda row :calculate_totduration(row['SR Creation Time'],row['Resolved Time'])))
selected_columns_df.to_csv('MYoutfile4.csv')
print('FILE WRITTEN')
print(pd.to_datetime('today'))
But I am getting the following error:
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py", line 3805, in get_loc return self._engine.get_loc(casted_key) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc File "index.pyx", line 175, in pandas._libs.index.IndexEngine.get_loc File "pandas\_libs\index_class_helper.pxi", line 70, in pandas._libs.index.Int64Engine._check_type KeyError: 'Total Minutes'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "c:\Users\xXXXXXx\Desktop\PythonCodes\DATASETS.py", line 71, in
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\frame.py", line 10374, in apply
return op.apply().finalize(self, method="apply")
^^^^^^^^^^
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-your text
packages\pandas\core\apply.py", line 916, in apply
return self.apply_standard()
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\apply.py", line 1063, in apply_standard
results, res_index = self.apply_series_generator()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\apply.py", line 1081, in apply_series_generator
results[i] = self.func(v, *self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\xXXXXXx\Desktop\PythonCodes\DATASETS.py", line 71, in
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))
~~~^^^^^^^^^^^^^^^^^
File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py", line 1121, in getitem return self._get_value(key) ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py", line 1237, in _get_value loc = self.index.get_loc(label) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xXXXXXx\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py", line 3812, in get_loc raise KeyError(key) from err KeyError: 'Total Minutes'
You forgot to add axis=1
.
Replace:
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']))
With:
selected_columns_df['Open Resolve Duration'] = selected_columns_df.apply(lambda row :res_duration(row['Total Minutes']), axis=1)