FAQ
Hi,

I rewritten WordCount sample to use new Hadoop API

however my reduce task doesn't launch.

the result file always looks like
some_word 1
some_word 1
another_word 1
another_word 1

...

Here is the code:

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

public static class WordCountMapper extends Mapper<LongWritable, Text, Text,
IntWritable> {

@Override
protected void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
StringTokenizer st = new StringTokenizer(value.toString());
while (st.hasMoreTokens()) {
context.write(new Text(st.nextToken()), new IntWritable(1));
}
}
}

public static class WordCountReduce extends Reducer<Text, IntWritable, Text,
IntWritable> {

@SuppressWarnings("unchecked")
public void reduce(Text key, Iterable<IntWritable> values, Reducer.Context
context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}

public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Job job = new Job();
job.setJobName("WordCounter");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)? 0 :1);
}
}

Looks like WordCountReduce was never launched but I don't see any Warnings
or Errors in log file.

Any help is highly appreciated.

Thanks in Advance,
Vitaliy S

Search Discussions

  • Ken Goodhope at Jul 4, 2010 at 7:45 pm
    You need @Override on your reduce method. Right now you are getting
    the identity reduce method.
    On 7/4/10, Vitaliy Semochkin wrote:
    Hi,

    I rewritten WordCount sample to use new Hadoop API

    however my reduce task doesn't launch.

    the result file always looks like
    some_word 1
    some_word 1
    another_word 1
    another_word 1

    ...

    Here is the code:

    import java.io.IOException;
    import java.util.StringTokenizer;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCount {

    public static class WordCountMapper extends Mapper<LongWritable, Text, Text,
    IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws
    IOException, InterruptedException {
    StringTokenizer st = new StringTokenizer(value.toString());
    while (st.hasMoreTokens()) {
    context.write(new Text(st.nextToken()), new IntWritable(1));
    }
    }
    }

    public static class WordCountReduce extends Reducer<Text, IntWritable, Text,
    IntWritable> {

    @SuppressWarnings("unchecked")
    public void reduce(Text key, Iterable<IntWritable> values, Reducer.Context
    context) throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable value : values) {
    sum += value.get();
    }
    context.write(key, new IntWritable(sum));
    }
    }

    public static void main(String[] args) throws IOException,
    InterruptedException, ClassNotFoundException {
    Job job = new Job();
    job.setJobName("WordCounter");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true)? 0 :1);
    }
    }

    Looks like WordCountReduce was never launched but I don't see any Warnings
    or Errors in log file.

    Any help is highly appreciated.

    Thanks in Advance,
    Vitaliy S
  • Vitaliy Semochkin at Jul 5, 2010 at 8:04 am
    Thank you very much Ken,


    The problem was with missing Generic declaration
    (Eclipse failed to override the method and I didn't notice mistake)

    instead of
    public void reduce(Text key, Iterable<IntWritable> values, Reducer.Context
    context) throws IOException, InterruptedException
    be should
    public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text,
    IntWritable, Text, IntWritable>.Context context) throws IOException,
    InterruptedException

    The complete working sample looks like
    package org.prototype4u.mapreduce;

    import java.io.IOException;
    import java.util.StringTokenizer;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCount {

    public static class WordCountMapper extends Mapper<LongWritable, Text, Text,
    IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws
    IOException, InterruptedException {
    StringTokenizer st = new StringTokenizer(value.toString());
    while (st.hasMoreTokens()) {
    context.write(new Text(st.nextToken()), new IntWritable(1));
    }
    }
    }

    public static class WordCountReduce extends Reducer<Text, IntWritable, Text,
    IntWritable> {

    @Override
    public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text,
    IntWritable, Text, IntWritable>.Context context) throws IOException,
    InterruptedException {
    int sum = 0;
    for (IntWritable value : values) {
    sum += value.get();
    }
    context.write(key, new IntWritable(sum));
    }

    }

    public static void main(String[] args) throws IOException,
    InterruptedException, ClassNotFoundException {
    Job job = new Job();
    job.setJobName("WordCounter");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true)? 0 :1);
    }
    }

    Thank you for the hint.

    Regards,
    Vitaliy S
    On Sun, Jul 4, 2010 at 11:45 PM, Ken Goodhope wrote:

    You need @Override on your reduce method. Right now you are getting
    the identity reduce method.
    On 7/4/10, Vitaliy Semochkin wrote:
    Hi,

    I rewritten WordCount sample to use new Hadoop API

    however my reduce task doesn't launch.

    the result file always looks like
    some_word 1
    some_word 1
    another_word 1
    another_word 1

    ...

    Here is the code:

    import java.io.IOException;
    import java.util.StringTokenizer;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCount {

    public static class WordCountMapper extends Mapper<LongWritable, Text, Text,
    IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws
    IOException, InterruptedException {
    StringTokenizer st = new StringTokenizer(value.toString());
    while (st.hasMoreTokens()) {
    context.write(new Text(st.nextToken()), new IntWritable(1));
    }
    }
    }

    public static class WordCountReduce extends Reducer<Text, IntWritable, Text,
    IntWritable> {

    @SuppressWarnings("unchecked")
    public void reduce(Text key, Iterable<IntWritable> values,
    Reducer.Context
    context) throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable value : values) {
    sum += value.get();
    }
    context.write(key, new IntWritable(sum));
    }
    }

    public static void main(String[] args) throws IOException,
    InterruptedException, ClassNotFoundException {
    Job job = new Job();
    job.setJobName("WordCounter");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true)? 0 :1);
    }
    }

    Looks like WordCountReduce was never launched but I don't see any Warnings
    or Errors in log file.

    Any help is highly appreciated.

    Thanks in Advance,
    Vitaliy S

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommon-user @
categorieshadoop
postedJul 4, '10 at 5:32p
activeJul 5, '10 at 8:04a
posts3
users2
websitehadoop.apache.org...
irc#hadoop

2 users in discussion

Vitaliy Semochkin: 2 posts Ken Goodhope: 1 post

People

Translate

site design / logo © 2022 Grokbase