I have two timestamps as input. I want to calculate the time difference in hours between those timestamps excluding Sundays.
I can get the number of days using datediff function in hive.
I can get the day of a particular date using from_unixtime(unix_timestamp(startdate), 'EEEE').
But I dont know how to relate those functions to achieve my requirement or is there any other easy way to achieve this.
Thanks in Advance.
You can write one custom UDF which takes two columns containing the dates as inputs and counts the difference between the dates excluding sundays.
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Date;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
public class IsoYearWeek extends UDF {
public LongWritable evaluate(Text dateString,Text dateString1) throws ParseException { //takes the two columns as inputs
SimpleDateFormat date = new SimpleDateFormat("dd/MM/yyyy");
/* String date1 = "20/07/2016";
String date2 = "28/07/2016";
*/ int count=0;
List<Date> dates = new ArrayList<Date>();
Date startDate = (Date)date.parse(dateString.toString());
Date endDate = (Date)date.parse(dateString1.toString());
long interval = 24*1000 * 60 * 60; // 1 hour in millis
long endTime =endDate.getTime() ; // create your endtime here, possibly using Calendar or Date
long curTime = startDate.getTime();
while (curTime <= endTime) {
dates.add(new Date(curTime));
curTime += interval;
}
for(int i=0;i<dates.size();i++){
Date lDate =(Date)dates.get(i);
if(lDate.getDay()==0){
count+=1; //counts the number of sundays in between
}
}
long days_diff = (endDate.getTime()-startDate.getTime())/(24 * 60 * 60 * 1000)-count; //displays the days difference excluding sundays
return new LongWritable(days_diff);
}
}