When I run the job, the throws the following error.
11/07/29 22:22:22 INFO mapred.JobClient: Task Id : attempt_201107292131_0011_m_000000_2, Status : FAILED
java.io.IOException: Type mismatch in value from map: expected org.apache.hadoop.io.IntWritable, recieved org.apache.hadoop.io.Text
But I already set IntWritable in 2 places,
1: Reducer<Text,Text,Text,IntWritable>
2:job.setOutputValueClass(IntWritable.class);
So where am I wrong?
public class MyTest {
public static class TokenizerMapper
extends Mapper<Text, Text, Text, Text>{
public void map(Text key, Text value, Context context
) throws IOException, InterruptedException {
context.write(key, value);
}
}
public static class IntSumReducer
extends Reducer<Text,Text,Text,IntWritable> {
public void reduce(Text key, Iterable<Text> values,
Context context
) throws IOException, InterruptedException {
int count = 0;
for (Text iw:values) {
count++;
}
context.write(key, new IntWritable(count));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// the configure of seprator should be done in conf
conf.set("key.value.separator.in.input.line", ",");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
// job.setReducerClass(IntSumReducer.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
// job.set("key.value.separator.in.input.line", ",");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}