pythonpandasmultiple-conditionsfillnaffill

Python Pandas ffill or bffill with multiple condition


I have a dataset like below :

condition=[None,None,None,'condtion1',None,None,None,'conditon2',None,None,None]
add_val = [None,None,None,10,None,None,None,20,None,None,None]
df=pd.DataFrame({'event_condition':condition,'add_col':add_val})

enter image description here

I want to fillna of add_col in terms of condition.

When I meet condition 1 of event_condition, I want to ffill with value of 10 in a separate column called 'on_condition1'. It should not be changed for condition2. When I meet condition 2 of event_conditon, I want to ffill with value of 20 in a separate column called 'on_condition2' It should not be changed for condition1. Before meeting these conditions, I want to back fill with 0.

I tried groupby method but it did not work :

 df.groupby(df['event_condition'].eq(cond).cumsum())['add_col'].ffill()

enter image description here

It will be like this :

condition=[None,None,None,'condtion1',None,None,None,'conditon2',None,None,None]
add_val = [None,None,None,10,None,None,None,20,None,None,None]
on_cond1=[0,0,0,None,10,10,10,None,10,10,10]
on_cond2=[0,0,0,None,0,0,0 ,None,20,20,20]
df=pd.DataFrame({'event_condition':condition,'add_col':add_val,'on_condition1':on_cond1,'on_condition2':on_cond2})

enter image description here


Solution

  • Use:

    #create Series with non NaNs rows by event_condition
    s = df.dropna(subset=['event_condition']).set_index('event_condition')['add_col']
    
    #for each value add new column
    for k, v in s.items():
        #new column name
        col = f'on_{k}'
        #test all rows after match condition
        m = df['event_condition'].eq(k).cummax()
        #test rows for non NaNs
        m1 = df['event_condition'].notna()
        #set new column by 2 masks
        df[col] = np.select([m1, m], [np.nan,v], 0)
    
    print (df)
       event_condition  add_col  on_condtion1  on_conditon2
    0             None      NaN           0.0           0.0
    1             None      NaN           0.0           0.0
    2             None      NaN           0.0           0.0
    3        condtion1     10.0           NaN           NaN
    4             None      NaN          10.0           0.0
    5             None      NaN          10.0           0.0
    6             None      NaN          10.0           0.0
    7        conditon2     20.0           NaN           NaN
    8             None      NaN          10.0          20.0
    9             None      NaN          10.0          20.0
    10            None      NaN          10.0          20.0
    

    Another idea with forward fillig:

    s = df.dropna(subset=['event_condition']).set_index('event_condition')['add_col']
    for k, v in s.items():
        col = f'on_{k}'
        m = df['event_condition'].eq(k)
        m1 = df['event_condition'].notna()
        df[col] = np.select([m1, m.cummax()], 
                            [np.nan, df['add_col'].where(m).ffill()], 0)
    
    print (df)
       event_condition  add_col  on_condtion1  on_conditon2
    0             None      NaN           0.0           0.0
    1             None      NaN           0.0           0.0
    2             None      NaN           0.0           0.0
    3        condtion1     10.0           NaN           NaN
    4             None      NaN          10.0           0.0
    5             None      NaN          10.0           0.0
    6             None      NaN          10.0           0.0
    7        conditon2     20.0           NaN           NaN
    8             None      NaN          10.0          20.0
    9             None      NaN          10.0          20.0
    10            None      NaN          10.0          20.0
    

    With baxk fillig, is necsary change order for cumulative max:

    s = df.dropna(subset=['event_condition']).set_index('event_condition')['add_col']
    for k, v in s.items():
        col = f'on_{k}'
        m = df['event_condition'].eq(k)
        m1 = df['event_condition'].notna()
        df[col] = np.select([m1, m.iloc[::-1].cummax().iloc[::-1]], 
                            [np.nan, df['add_col'].where(m).bfill()], 0)
    
    print (df)
       event_condition  add_col  on_condtion1  on_conditon2
    0             None      NaN          10.0          20.0
    1             None      NaN          10.0          20.0
    2             None      NaN          10.0          20.0
    3        condtion1     10.0           NaN           NaN
    4             None      NaN           0.0          20.0
    5             None      NaN           0.0          20.0
    6             None      NaN           0.0          20.0
    7        conditon2     20.0           NaN           NaN
    8             None      NaN           0.0           0.0
    9             None      NaN           0.0           0.0
    10            None      NaN           0.0           0.0
    

    EDIT: If combine forward and back filling together:

    condition=[None,None,None,'condtion1',None,None,None,'conditon2',None,None,None]
    add_val = [None,None,None,10,None,None,None,20,None,None,None]
    add_val1 = [None,None,None,70,None,None,None,50,None,None,None]
    df=pd.DataFrame({'event_condition':condition,'add_col':add_val, 'add_col2':add_val1})
    print (df)
       event_condition  add_col  add_col2
    0             None      NaN       NaN
    1             None      NaN       NaN
    2             None      NaN       NaN
    3        condtion1     10.0      70.0
    4             None      NaN       NaN
    5             None      NaN       NaN
    6             None      NaN       NaN
    7        conditon2     20.0      50.0
    8             None      NaN       NaN
    9             None      NaN       NaN
    10            None      NaN       NaN
    

    df1 = df.dropna(subset=['event_condition']).set_index('event_condition')[['add_col','add_col2']]
    print (df1)
                     add_col  add_col2
    event_condition                   
    condtion1           10.0      70.0
    conditon2           20.0      50.0
    

    for k, v in df1['add_col2'].items():
        col = f'on_{k}'
        m = df['event_condition'].eq(k)
    
        m1 = df['event_condition'].notna()
        df[col] = np.select([m1, m.iloc[::-1].cummax().iloc[::-1]], 
                            [np.nan, df['add_col2'].where(m).bfill()], 
                            df['add_col'].where(m).ffill())
    
    print (df)
       event_condition  add_col  add_col2  on_condtion1  on_conditon2
    0             None      NaN       NaN          70.0          50.0
    1             None      NaN       NaN          70.0          50.0
    2             None      NaN       NaN          70.0          50.0
    3        condtion1     10.0      70.0           NaN           NaN
    4             None      NaN       NaN          10.0          50.0
    5             None      NaN       NaN          10.0          50.0
    6             None      NaN       NaN          10.0          50.0
    7        conditon2     20.0      50.0           NaN           NaN
    8             None      NaN       NaN          10.0          20.0
    9             None      NaN       NaN          10.0          20.0
    10            None      NaN       NaN          10.0          20.0