Currently I'm running this below code for single record but I want run this code for all the cells in the column. Please help on this request.
from datetime import datetime
import xlrd
Date= 44774.45833333
New_date = datetime (*xlrd.xldate_as_tuple(excel_date,0))
print(New_date)
#rsult is 2022-08-01 11:00:00
Now I have a column called RETA where I want to apply this code to all the cells in this column and change the numeric format to datetime format.
sample data:
Theater | RETA TPM SLACycletime
----------------------------------------------------------
US | 2022-09-22 15:33:00 | Invalid Data
US | 44774.45833 | 558:19:30
US | 2022-09-2022 18:03:00 | 111:44:26
US | | 15:44:26
US | 1/8/2022 10:00:00 AM | Invalid Data
As you want a datetime, use xlrd.xldate_as_datetime
directly.
The easy (but non-vectorized) answer is:
df = pd.DataFrame({'RETA': [None, 0, 1, 10, 61, 10000, 44774.45833333, 44774.5]})
df['date'] = df['RETA'].apply(lambda date: xlrd.xldate_as_datetime(date, 0)
if pd.notna(date) else date)
Output:
RETA date
0 NaN NaT
1 0.000000 1899-12-31 00:00:00
2 1.000000 1900-01-01 00:00:00
3 10.000000 1900-01-10 00:00:00
4 60.000000 1900-02-28 00:00:00
5 61.000000 1900-03-01 00:00:00
6 10000.000000 1927-05-18 00:00:00
7 44774.458333 2022-08-01 11:00:00
8 44774.510000 2022-08-01 12:14:24
For a vectorized version, let's rewite xlrd.xldate_as_datetime
's code to work on a Series:
def vectorized_xldate(date, mode=0):
epoch = pd.Timestamp('1970-01-01')
if mode:
epoch -= pd.Timestamp('1904-01-01')
else:
epoch = (pd.Series(pd.Timestamp('1899-12-31'), index=date.index)
.where(date<60, pd.Timestamp('1899-12-30'))
.rsub(epoch)
)
return (pd.to_datetime(date.mul(86400).round(3), unit='s')
.sub(epoch).round('us')
)
df['date_vectorized'] = vectorized_xldate(df['RETA'], mode=0)
# or
# df['date_vectorized'] = vectorized_xldate(df['RETA'])
Example with the two modes:
# mode=0
RETA date_mode0 date_mode0_vectorized
0 NaN NaT NaT
1 0.000000 1899-12-31 00:00:00.000 1899-12-31 00:00:00.000
2 1.000000 1900-01-01 00:00:00.000 1900-01-01 00:00:00.000
3 10.000000 1900-01-10 00:00:00.000 1900-01-10 00:00:00.000
4 59.000000 1900-02-28 00:00:00.000 1900-02-28 00:00:00.000
5 59.999999 1900-02-28 23:59:59.914 1900-02-28 23:59:59.914
6 60.000000 1900-02-28 00:00:00.000 1900-02-28 00:00:00.000
7 61.000000 1900-03-01 00:00:00.000 1900-03-01 00:00:00.000
8 10000.000000 1927-05-18 00:00:00.000 1927-05-18 00:00:00.000
9 44774.458333 2022-08-01 11:00:00.000 2022-08-01 11:00:00.000
10 44774.123457 2022-08-01 02:57:46.667 2022-08-01 02:57:46.667
# mode=1
RETA date_mode1 date_mode1_vectorized
0 NaN NaT NaT
1 0.000000 1904-01-01 00:00:00.000 1904-01-01 00:00:00.000
2 1.000000 1904-01-02 00:00:00.000 1904-01-02 00:00:00.000
3 10.000000 1904-01-11 00:00:00.000 1904-01-11 00:00:00.000
4 59.000000 1904-02-29 00:00:00.000 1904-02-29 00:00:00.000
5 59.999999 1904-02-29 23:59:59.914 1904-02-29 23:59:59.914
6 60.000000 1904-03-01 00:00:00.000 1904-03-01 00:00:00.000
7 61.000000 1904-03-02 00:00:00.000 1904-03-02 00:00:00.000
8 10000.000000 1931-05-19 00:00:00.000 1931-05-19 00:00:00.000
9 44774.458333 2026-08-02 11:00:00.000 2026-08-02 11:00:00.000
10 44774.123457 2026-08-02 02:57:46.667 2026-08-02 02:57:46.667
def vectorized_xldate(date, mode=0, errors='raise'):
epoch = pd.Timestamp('1970-01-01')
if errors == 'coerce':
dt = pd.to_datetime(date, format='mixed', errors='coerce')
date = pd.to_numeric(date, errors='coerce')
else:
dt = float('nan')
if mode:
epoch -= pd.Timestamp('1904-01-01')
else:
epoch = (pd.Series(pd.Timestamp('1899-12-31'), index=date.index)
.where(date<60, pd.Timestamp('1899-12-30'))
.rsub(epoch)
)
return (pd.to_datetime(date.mul(86400).round(3), unit='s')
.sub(epoch).round('us')
.fillna(dt)
)
df['date'] = vectorized_xldate(df['RETA'], errors='coerce').dt.round('S')
Output:
Theater RETA TPM SLACycletime date
0 US 2022-09-22 15:33:00 Invalid Data 2022-09-22 15:33:00
1 US 44774.45833 558:19:30 2022-08-01 11:00:00
2 US 2022-09-2022 18:03:00 111:44:26 NaT
3 US 15:44:26 None 2023-08-04 15:44:26
4 US 1/8/2022 10:00:00 AM Invalid Data 2022-01-08 10:00:00