FAQ
I am working through the WordCount example to get rid of all the deprecation
warnings. While running it, my reduce function isn't being called. Any
ideas? The code below can also be found here: http://gist.github.com/346975

Thanks!
Chris

package hadoop.examples;

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;

public class WordCount extends Configured implements Tool {

public static class Map extends Mapper<LongWritable, Text, Text,
IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();

public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}

public static class Reduce extends Reducer<Text, IntWritable, Text,
IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, Context
context)
throws IOException, InterruptedException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
context.write(key, new IntWritable(sum));
}
}

public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCount(), args);
System.exit(res);
}

@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "wordcount");

job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
//job.setCombinerClass(Reduce.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

return job.waitForCompletion(true) ? 0 : 1;
}
}

--
View this message in context: http://n3.nabble.com/Error-converting-WordCount-to-v0-20-x-tp682061p682061.html
Sent from the Users mailing list archive at Nabble.com.

Search Discussions

  • Slim tebourbi at Apr 1, 2010 at 9:48 pm
    I've tried to try the same thing and I noted that even the map function was
    not executed!

    here are the logs :

    $ hadoop jar wordcount.jar org.stebourbi.hadoop.training.WordCount input
    output

    10/04/01 23:39:53 INFO security.Groups: Group mapping
    impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping;
    cacheTimeout=300000
    10/04/01 23:39:53 WARN conf.Configuration: mapred.task.id is deprecated.
    Instead, use mapreduce.task.attempt.id

    10/04/01 23:39:53 DEBUG mapreduce.JobSubmitter: Configuring job
    job_201004012334_0007 with
    hdfs://localhost:9000/tmp/hadoop-tebourbi/mapred/staging/tebourbi/.staging/job_201004012334_0007
    as the submit dir
    10/04/01 23:39:53 WARN mapreduce.JobSubmitter: Use GenericOptionsParser for
    parsing the arguments. Applications should implement Tool for the same.
    10/04/01 23:39:53 DEBUG mapreduce.JobSubmitter: default FileSystem:
    hdfs://localhost:9000
    10/04/01 23:39:54 DEBUG mapreduce.JobSubmitter: Creating splits at
    hdfs://localhost:9000/tmp/hadoop-tebourbi/mapred/staging/tebourbi/.staging/job_201004012334_0007
    10/04/01 23:39:54 INFO input.FileInputFormat: Total input paths to process :
    3
    10/04/01 23:39:54 DEBUG input.FileInputFormat: Total # of splits: 3
    10/04/01 23:39:54 WARN conf.Configuration: mapred.map.tasks is deprecated.
    Instead, use mapreduce.job.maps
    10/04/01 23:39:54 INFO mapreduce.JobSubmitter: number of splits:3
    10/04/01 23:39:54 INFO mapreduce.JobSubmitter: adding the following
    namenodes' delegation tokens:null
    10/04/01 23:39:54 INFO mapreduce.Job: Running job: job_201004012334_0007
    10/04/01 23:39:55 INFO mapreduce.Job: map 0% reduce 0%
    10/04/01 23:39:55 INFO mapreduce.Job: Job complete: job_201004012334_0007
    10/04/01 23:39:55 INFO mapreduce.Job: Counters: 4
    Job Counters
    Total time spent by all maps waiting after reserving slots (ms)=0
    Total time spent by all reduces waiting after reserving slots (ms)=0
    SLOTS_MILLIS_MAPS=0
    SLOTS_MILLIS_REDUCES=0


    However, the same code works well on eclipse as a simple java program!

    Slim.

    2010/3/28 Chris Williams <chris.d.williams@gmail.com>
    I am working through the WordCount example to get rid of all the
    deprecation
    warnings. While running it, my reduce function isn't being called. Any
    ideas? The code below can also be found here:
    http://gist.github.com/346975

    Thanks!
    Chris

    package hadoop.examples;

    import java.io.IOException;
    import java.util.*;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;

    public class WordCount extends Configured implements Tool {

    public static class Map extends Mapper<LongWritable, Text, Text,
    IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value, Context
    context)
    throws IOException, InterruptedException {
    String line = value.toString();
    StringTokenizer tokenizer = new
    StringTokenizer(line);
    while (tokenizer.hasMoreTokens()) {
    word.set(tokenizer.nextToken());
    context.write(word, one);
    }
    }
    }

    public static class Reduce extends Reducer<Text, IntWritable, Text,
    IntWritable> {
    public void reduce(Text key, Iterator<IntWritable> values,
    Context
    context)
    throws IOException, InterruptedException {
    int sum = 0;
    while (values.hasNext()) {
    sum += values.next().get();
    }
    context.write(key, new IntWritable(sum));
    }
    }

    public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new Configuration(), new
    WordCount(), args);
    System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "wordcount");

    job.setJarByClass(WordCount.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    //job.setCombinerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
    }
    }

    --
    View this message in context:
    http://n3.nabble.com/Error-converting-WordCount-to-v0-20-x-tp682061p682061.html
    Sent from the Users mailing list archive at Nabble.com.

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommon-user @
categorieshadoop
postedMar 28, '10 at 7:52p
activeApr 1, '10 at 9:48p
posts2
users2
websitehadoop.apache.org...
irc#hadoop

2 users in discussion

Slim tebourbi: 1 post Chris Williams: 1 post

People

Translate

site design / logo © 2021 Grokbase