Im running a Hadoop Mapreduce program to calculate the average, maximum and minimum temperature. Temperature is stored in input1.csv file with three columns Date in YYYY-MM-DD format, temperature in numbers in one column and location data as text. When i try to execute i get the error Error: java.io.IOException: wrong value class: class org.apache.hadoop.io.Text is not class org.apache.hadoop.io.FloatWritable.
The following is the code i have written. Please solve this error
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class TemperatureStats {
public static class TemperatureMapper extends Mapper<Object, Text, Text, FloatWritable> {
private Text year = new Text();
private FloatWritable temperature = new FloatWritable();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] tokens = value.toString().split(",");
String date = tokens[0];
String[] dateParts = date.split("-");
String yearStr = dateParts[0];
year.set(yearStr);
float temp = Float.parseFloat(tokens[1]);
temperature.set(temp);
context.write(year, temperature);
}
}
public static class TemperatureReducer extends Reducer<Text, FloatWritable, Text, Text> {
public void reduce(Text key, Iterable<FloatWritable> values, Context context)
throws IOException, InterruptedException {
float sum = 0;
int count = 0;
float minTemp = Float.MAX_VALUE;
float maxTemp = Float.MIN_VALUE;
// Calculate sum, count, min, and max temperature for each year
for (FloatWritable val : values) {
float temp = val.get();
sum += temp;
count++;
minTemp = Math.min(minTemp, temp);
maxTemp = Math.max(maxTemp, temp);
}
// Calculate average temperature
float averageTemp = sum / count;
// Emit year and stats
context.write(key, new Text("Average: " + averageTemp + ", Min: " + minTemp + ", Max: " + maxTemp));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "temperature stats");
job.setJarByClass(TemperatureStats.class);
job.setMapperClass(TemperatureMapper.class);
job.setCombinerClass(TemperatureReducer.class);
job.setReducerClass(TemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Problem cause: After setting job.setCombinerClass(TemperatureReducer.class)
, the type of key and value entered for the Reducer stage are both of type Text. While the type of value entered for TemperatureReducer in the code is FloatWritable, so there will be an error: class org.apache.hadoop.io.Text is not class org.apache.hadoop.io
Solution: Remove job.setCombinerClass(TemperatureReducer.class).