impalahue

How to convert string to date and use MIN and MAX in Impala?


I'm trying to get the highest date and lowest date in the column, from there get the data that is in the filesize column of the rows that have these respective dates and add it to another column. I made the following code but it doesn't work.

CASE WHEN MIN(to_timestamp(extract_dt, 'yyyy-MM-dd')) THEN filesize ELSE 0 END filesize_extracao_passada,

Solution

  • You can use a SQL like below.

    select 
    filesize,dt,
    if ( mx.mxdt = to_timestamp(extract_dt, 'yyyy-MM-dd'), filesize,null) mx_dt, -- this compares and display filesize for max dt
    if ( mn.mndt = to_timestamp(extract_dt, 'yyyy-MM-dd'), filesize,null) mn_dt
    from mytable
    left join (select max(to_timestamp(extract_dt, 'yyyy-MM-dd')) mxdt from tmp)mx on 1=1 --This calculates maximum date
    left join (select min(to_timestamp(extract_dt, 'yyyy-MM-dd')) mndt from tmp)mn on 1=1 --This calculates minimum date
    WHERE 
    mn.mndt = to_timestamp(extract_dt, 'yyyy-MM-dd') OR mx.mxdt = to_timestamp(extract_dt, 'yyyy-MM-dd')  -- this ensure your select clause returns only rows having maximum or minimum dates
    

    Here is my code if you want to try out in your environment.

    create table tmp  as 
    select 'a' as id, now() dt union all
    select 'a' as id, now()+ interval 1 days dt union all
    select 'a' as id, now() - interval 5 days dt union all
    select 'a' as id, now()+ interval 3 days dt union all
    select 'a' as id, now()+ interval 3 days dt union all
    select 'a' as id, now()+ interval 1 days dt union all
    select 'a' as id, now()+ interval 2 days dt ;
    
    select 
    id,dt,
    if ( mx.mxdt = dt, id,null) mx_dt,
    if ( mn.mndt = dt, id,null) mn_dt
    from tmp
    left join (select max(dt) mxdt from tmp)mx on 1=1
    left join (select min(dt) mndt from tmp)mn on 1=1
    WHERE mx.mxdt = dt or mn.mndt = dt
    

    enter image description here