I have a .csv file where many different properties are measured with 10 Hz frequency. The first row with the timestamps looks as follows:
0 NaN
1 29/07/2021 11:00
2 00:00.1
3 00:00.2
4 00:00.3
5 00:00.4
6 00:00.5
7 00:00.6
8 00:00.7
9 00:00.8
10 00:00.9
11 29/07/2021 11:00
12 00:01.1
13 00:01.2
14 00:01.3
...
Name: TIMESTAMP, Length: 36601, dtype: object
I have never seen data being recorded like that. Is there a way to convert all the rows with given milliseconds to a datetime format as in row 1 but with ms and add ms to the rows 1, 11 and so on? So that I have:
0 NaN
1 29/07/2021 11:00:00.1
2 29/07/2021 11:00:00.2
3 29/07/2021 11:00:00.3
...
data = pd.read_csv('file.csv', skiprows=1, header=None, names=['timestamp'])
absolute_timestamps = []
base_time = None
for index, row in data.iterrows():
timestamp = row['timestamp']
if isinstance(timestamp, float):
continue
if '/' in timestamp:
base_time = datetime.strptime(timestamp, "%d/%m/%Y %H:%M")
current_time = base_time
else:
try:
seconds, milliseconds = map(float, timestamp.split(':'))
delta = timedelta(seconds=seconds, milliseconds=milliseconds*100)
current_time = base_time + delta
absolute_timestamps.append(current_time)
except ValueError:
continue
result_df = pd.DataFrame(absolute_timestamps, columns=['absolute_timestamp'])
print(result_df)
This is giving me an empty DataFrame.
MRE of the csv file:
TIMESTAMP,Ux,Uy,Uz,Ts,CO2,H2O,press,t_hmp,e_hmp,rh_hmp
,m/s,m/s,m/s,C,mg/m^3,g/m^3,kPa,C,kPa,percent
29/07/2021 11:00,0.241,-0.2965,0.258,25.99887,675.759,15.90253,98.67521,27.66419,2.129122,57.44878
00:00.1,0.3545,-0.34425,0.21125,25.87463,675.9955,15.9117,98.65817,27.66253,2.136629,57.65696
00:00.2,0.38175,-0.28125,0.2045,25.95746,675.7098,15.93659,98.65817,27.65421,2.137441,57.70692
00:00.3,0.3465,-0.22175,0.20025,26.05753,674.4702,15.98516,98.67521,27.65919,2.137509,57.69193
00:00.4,0.08875,-0.326,0.2305,26.0351,676.1616,15.89104,98.65817,27.67419,2.139629,57.69859
00:00.5,0.012,-0.4945,0.26625,26.00403,675.5728,15.90638,98.65817,27.6925,2.142476,57.71358
00:00.6,0.10225,-0.42525,0.2155,25.9816,670.6963,16.13573,98.67521,27.63588,2.132997,57.64863
00:00.7,0.06425,-0.39075,0.01075,26.0144,671.8867,16.07627,98.69327,27.66419,2.137578,57.67694
00:00.8,0.2435,-0.51875,0.1525,25.98849,668.4075,16.23631,98.65817,27.66419,2.136035,57.6353
00:00.9,0.22075,-0.61475,0.033,25.80215,674.759,15.92974,98.68423,27.62256,2.132384,57.67694
29/07/2021 11:00,0.28025,-0.7262501,-0.053,25.84531,677.9393,15.76822,98.67521,27.65587,2.137896,57.71358
00:01.1,0.27025,-0.7032501,-0.13675,25.83667,679.4952,15.69851,98.65817,27.64421,2.136132,57.70525
00:01.2,0.232,-0.694,-0.05075,25.83496,679.4978,15.70536,98.65817,27.70084,2.145746,57.77353
00:01.3,0.165,-0.6102501,-0.0565,25.86084,679.2551,15.71204,98.67521,27.68085,2.14386,57.79018
00:01.4,0.23975,-0.636,-0.0815,25.82632,679.4276,15.70096,98.64916,27.71249,2.147887,57.79185
00:01.5,0.27175,-0.591,-0.021,25.82806,679.7066,15.69168,98.65817,27.63921,2.137542,57.76021
00:01.6,0.188,-0.60025,-0.04875,25.84009,678.5519,15.75871,98.64916,27.70416,2.145296,57.75022
00:01.7,0.14625,-0.566,0.001,25.80905,680.5561,15.66352,98.67521,27.64587,2.139792,57.79851
00:01.8,0.1705,-0.67675,0.01,25.79874,680.9865,15.64016,98.65817,27.66253,2.141258,57.78186
00:01.9,0.162,-0.6135001,-0.08125,25.7901,681.2719,15.61947,98.69327,27.67252,2.142385,57.77853
Which I want to transform to:
TIMESTAMP,Ux,Uy,Uz,Ts,CO2,H2O,press,t_hmp,e_hmp,rh_hmp
,m/s,m/s,m/s,C,mg/m^3,g/m^3,kPa,C,kPa,percent
29/07/2021 11:00:00.0,0.241,-0.2965,0.258,25.99887,675.759,15.90253,98.67521,27.66419,2.129122,57.44878
29/07/2021 11:00:00.1,0.3545,-0.34425,0.21125,25.87463,675.9955,15.9117,98.65817,27.66253,2.136629,57.65696
29/07/2021 11:00:00.2,0.38175,-0.28125,0.2045,25.95746,675.7098,15.93659,98.65817,27.65421,2.137441,57.70692
29/07/2021 11:00:00.3,0.3465,-0.22175,0.20025,26.05753,674.4702,15.98516,98.67521,27.65919,2.137509,57.69193
29/07/2021 11:00:00.4,0.08875,-0.326,0.2305,26.0351,676.1616,15.89104,98.65817,27.67419,2.139629,57.69859
29/07/2021 11:00:00.5,0.012,-0.4945,0.26625,26.00403,675.5728,15.90638,98.65817,27.6925,2.142476,57.71358
29/07/2021 11:00:00.6,0.10225,-0.42525,0.2155,25.9816,670.6963,16.13573,98.67521,27.63588,2.132997,57.64863
29/07/2021 11:00:00.7,0.06425,-0.39075,0.01075,26.0144,671.8867,16.07627,98.69327,27.66419,2.137578,57.67694
29/07/2021 11:00:00.8,0.2435,-0.51875,0.1525,25.98849,668.4075,16.23631,98.65817,27.66419,2.136035,57.6353
29/07/2021 11:00:00.9,0.22075,-0.61475,0.033,25.80215,674.759,15.92974,98.68423,27.62256,2.132384,57.67694
29/07/2021 11:00:01.0,0.28025,-0.7262501,-0.053,25.84531,677.9393,15.76822,98.67521,27.65587,2.137896,57.71358
29/07/2021 11:00:01.1,0.27025,-0.7032501,-0.13675,25.83667,679.4952,15.69851,98.65817,27.64421,2.136132,57.70525
29/07/2021 11:00:01.2,0.232,-0.694,-0.05075,25.83496,679.4978,15.70536,98.65817,27.70084,2.145746,57.77353
29/07/2021 11:00:01.3,0.165,-0.6102501,-0.0565,25.86084,679.2551,15.71204,98.67521,27.68085,2.14386,57.79018
29/07/2021 11:00:01.4,0.23975,-0.636,-0.0815,25.82632,679.4276,15.70096,98.64916,27.71249,2.147887,57.79185
29/07/2021 11:00:01.5,0.27175,-0.591,-0.021,25.82806,679.7066,15.69168,98.65817,27.63921,2.137542,57.76021
29/07/2021 11:00:01.6,0.188,-0.60025,-0.04875,25.84009,678.5519,15.75871,98.64916,27.70416,2.145296,57.75022
29/07/2021 11:00:01.7,0.14625,-0.566,0.001,25.80905,680.5561,15.66352,98.67521,27.64587,2.139792,57.79851
29/07/2021 11:00:01.8,0.1705,-0.67675,0.01,25.79874,680.9865,15.64016,98.65817,27.66253,2.141258,57.78186
29/07/2021 11:00:01.9,0.162,-0.6135001,-0.08125,25.7901,681.2719,15.61947,98.69327,27.67252,2.142385,57.77853
Dates are parsed in 3 steps
import pandas as pd
from datetime import datetime, timezone
import time
def date_to_unixts(col):
if len(col.loc[0]) > 7:
base_time = datetime.strptime(col.loc[0], dt_fmt0)
else:
base_time = datetime.strptime(f'01/01/1970 00:{col.loc[0]}', dt_fmt)
col.loc[0] = base_time.replace(tzinfo = timezone.utc).timestamp()
return col
def sum_if(x, y):
# return first
m = pd.Series([y])
# add first to rows 1 to 9
return pd.concat([m, x.iloc[1:].apply(lambda t: t + y)])
pd.options.display.float_format = '{:.2f}'.format
dt_fmt0 = "%d/%m/%Y %H:%M"
dt_fmt = "%d/%m/%Y %H:%M:%S.%f"
data = pd.read_csv('/home/lmc/tmp/temp.csv', skiprows=1, header=None)
# Convert date/time to unix timestamp as is
df = data.apply(date_to_unixts, axis=1)
# add first full date to short dates every 10 records
df['ts2'] = df.groupby(df.index//10)[0].apply(lambda x: sum_if(x, x.iloc[0])).reset_index()[0]
# convert timestamp back to date
df['dt2'] = df['ts2'].apply(lambda y: datetime.utcfromtimestamp(y).strftime(dt_fmt)[:-5])
print(df[[1,2,3,'dt2']])
Result showing a few columns plus parsed datetime
1 2 3 dt2
0 0.24 -0.30 0.26 29/07/2021 11:00:00.0
1 0.35 -0.34 0.21 29/07/2021 11:00:00.1
2 0.38 -0.28 0.20 29/07/2021 11:00:00.2
3 0.35 -0.22 0.20 29/07/2021 11:00:00.3
4 0.09 -0.33 0.23 29/07/2021 11:00:00.4
5 0.01 -0.49 0.27 29/07/2021 11:00:00.5
6 0.10 -0.43 0.22 29/07/2021 11:00:00.6
7 0.06 -0.39 0.01 29/07/2021 11:00:00.7
8 0.24 -0.52 0.15 29/07/2021 11:00:00.8
9 0.22 -0.61 0.03 29/07/2021 11:00:00.9
10 0.28 -0.73 -0.05 30/08/2021 17:00:00.0
11 0.27 -0.70 -0.14 30/08/2021 17:00:01.1
12 0.23 -0.69 -0.05 30/08/2021 17:00:01.2
13 0.17 -0.61 -0.06 30/08/2021 17:00:01.3
14 0.24 -0.64 -0.08 30/08/2021 17:00:01.4
15 0.27 -0.59 -0.02 30/08/2021 17:00:01.5
16 0.19 -0.60 -0.05 30/08/2021 17:00:01.6
17 0.15 -0.57 0.00 30/08/2021 17:00:01.7
18 0.17 -0.68 0.01 30/08/2021 17:00:01.8
19 0.16 -0.61 -0.08 30/08/2021 17:00:01.9
Note: Second full date was changed to better identify groups of records.