FAQ
I am a Hadoop novice so kindly pardon my ingorance.

I am running the following Hadoop program in Fully Distributed Mode to count
the number of lines in a file. I am running this job from eclipse and I see
it running (based on the output to the eclipse console) but I do not see the
tasks in the TaskTracker web interface. Also eventhough the data is
distributed accross multiple hosts it doesnt seem to be distributing works
accross hosts.

Could someone pelase help me with this.


package LineCount;

import java.util.*;
import java.io.*;

import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.*;

public class LineCount extends Configured implements Tool {

public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>
{
private static int counter = 1;
private static Text mapOpKey = new Text();
private final static IntWritable mapOpValue = new IntWritable(1);
@Override
public void map(LongWritable mapInpKey, Text mapInpValue,
Mapper<LongWritable,Text,Text,IntWritable>.Context context) throws
IOException,InterruptedException{
System.out.println("Calling Map "+ counter);
counter++;
mapOpKey.set("Number Of Lines");
context.write(mapOpKey, mapOpValue);
}
}

public static class Reduce extends
Reducer<Text,IntWritable,Text,IntWritable> {
private static int counter = 1;
@Override
public void reduce(Text redIpKey, Iterable<IntWritable> redIpValue,
Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws
IOException,InterruptedException {
int sum=0;
System.out.println("Calling Reduce "+ counter);
counter++;
while(redIpValue.iterator().hasNext()){
sum = sum + redIpValue.iterator().next().get();
}
context.write(redIpKey, new IntWritable(sum));
}
}

@Override
public int run(String[] args) throws Exception{

Configuration conf = new Configuration();
conf.addResource(new Path("/hadoop-0.20.2/conf/core-site.xml"));
Job job = new Job(conf);
job.setJobName("LineCount");
job.setJarByClass(LineCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, new Path("/usr/foo/hadoopIP"));
FileOutputFormat.setOutputPath(job, new Path("/usr/foo/hadoopOP"));
job.waitForCompletion(true);
return 0;
}

public static void main(String[] args) throws Exception{
ToolRunner.run(new LineCount(), args);
}
}


--
View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32096156.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.

Search Discussions

  • Teng, James at Jul 20, 2011 at 5:12 am
    You can't run a hadoop job in eclipse, you have to set up an environment on linux system. Maybe you can try to install it on WMware linux system and run the job in pseudo-distributed system.


    James, Teng (Teng Linxiao)
    eRL, CDC, eBay, Shanghai
    Extension: 86-21-28913530
    MSN: tenglinxiao@hotmail.com
    Skype: James,Teng
    Email: xteng@ebay.com

    -----Original Message-----
    From: foo_foo_foo
    Sent: Wednesday, July 20, 2011 10:05 AM
    To: core-user@hadoop.apache.org
    Subject: Job progress not showing in Hadoop Tasktracker web interface


    I am a Hadoop novice so kindly pardon my ingorance.

    I am running the following Hadoop program in Fully Distributed Mode to count
    the number of lines in a file. I am running this job from eclipse and I see
    it running (based on the output to the eclipse console) but I do not see the
    tasks in the TaskTracker web interface. Also eventhough the data is
    distributed accross multiple hosts it doesnt seem to be distributing works
    accross hosts.

    Could someone pelase help me with this.


    package LineCount;

    import java.util.*;
    import java.io.*;

    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.mapreduce.lib.input.*;
    import org.apache.hadoop.mapreduce.lib.output.*;
    import org.apache.hadoop.util.*;

    public class LineCount extends Configured implements Tool {

    public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>
    {
    private static int counter = 1;
    private static Text mapOpKey = new Text();
    private final static IntWritable mapOpValue = new IntWritable(1);
    @Override
    public void map(LongWritable mapInpKey, Text mapInpValue,
    Mapper<LongWritable,Text,Text,IntWritable>.Context context) throws
    IOException,InterruptedException{
    System.out.println("Calling Map "+ counter);
    counter++;
    mapOpKey.set("Number Of Lines");
    context.write(mapOpKey, mapOpValue);
    }
    }

    public static class Reduce extends
    Reducer<Text,IntWritable,Text,IntWritable> {
    private static int counter = 1;
    @Override
    public void reduce(Text redIpKey, Iterable<IntWritable> redIpValue,
    Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws
    IOException,InterruptedException {
    int sum=0;
    System.out.println("Calling Reduce "+ counter);
    counter++;
    while(redIpValue.iterator().hasNext()){
    sum = sum + redIpValue.iterator().next().get();
    }
    context.write(redIpKey, new IntWritable(sum));
    }
    }

    @Override
    public int run(String[] args) throws Exception{

    Configuration conf = new Configuration();
    conf.addResource(new Path("/hadoop-0.20.2/conf/core-site.xml"));
    Job job = new Job(conf);
    job.setJobName("LineCount");
    job.setJarByClass(LineCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    //job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    FileInputFormat.setInputPaths(job, new Path("/usr/foo/hadoopIP"));
    FileOutputFormat.setOutputPath(job, new Path("/usr/foo/hadoopOP"));
    job.waitForCompletion(true);
    return 0;
    }

    public static void main(String[] args) throws Exception{
    ToolRunner.run(new LineCount(), args);
    }
    }


    --
    View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32096156.html
    Sent from the Hadoop core-user mailing list archive at Nabble.com.
  • Steve Loughran at Jul 21, 2011 at 11:45 am

    On 20/07/11 06:11, Teng, James wrote:
    You can't run a hadoop job in eclipse, you have to set up an environment on linux system. Maybe you can try to install it on WMware linux system and run the job in pseudo-distributed system.
    Actually you can bring up a MiniMRCluster in your JUnit test run (if
    hadop-core-test is on the Classpath) and run simple jobs against that.
    This is the standard way that Hadoop tests itself. It's not that high
    performing, doesn't scale out and can leak threads, but it's ideal for
    basic testing
  • Harsh J at Jul 20, 2011 at 5:50 am
    Looks like it may be running in the local mode. Have you setup your
    Eclipse configuration properly?

    What version of Hadoop are you using?
    On Wed, Jul 20, 2011 at 7:35 AM, foo_foo_foo wrote:

    I am a Hadoop novice so kindly pardon my ingorance.

    I am running the following Hadoop program in Fully Distributed Mode to count
    the number of lines in a file. I am running this job from eclipse and I see
    it running (based on the output to the eclipse console) but I do not see the
    tasks in the TaskTracker web interface. Also eventhough the data is
    distributed accross multiple hosts it doesnt seem to be distributing works
    accross hosts.

    Could someone pelase help me with this.


    package LineCount;

    import java.util.*;
    import java.io.*;

    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.mapreduce.lib.input.*;
    import org.apache.hadoop.mapreduce.lib.output.*;
    import org.apache.hadoop.util.*;

    public class LineCount extends Configured implements Tool {

    public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>
    {
    private static int counter = 1;
    private static Text mapOpKey = new Text();
    private final static IntWritable mapOpValue = new IntWritable(1);
    @Override
    public void map(LongWritable mapInpKey, Text mapInpValue,
    Mapper<LongWritable,Text,Text,IntWritable>.Context context) throws
    IOException,InterruptedException{
    System.out.println("Calling Map "+ counter);
    counter++;
    mapOpKey.set("Number Of Lines");
    context.write(mapOpKey, mapOpValue);
    }
    }

    public static class Reduce extends
    Reducer<Text,IntWritable,Text,IntWritable> {
    private static int counter = 1;
    @Override
    public void reduce(Text redIpKey, Iterable<IntWritable> redIpValue,
    Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws
    IOException,InterruptedException {
    int sum=0;
    System.out.println("Calling Reduce "+ counter);
    counter++;
    while(redIpValue.iterator().hasNext()){
    sum = sum + redIpValue.iterator().next().get();
    }
    context.write(redIpKey, new IntWritable(sum));
    }
    }

    @Override
    public int run(String[] args) throws Exception{

    Configuration conf = new Configuration();
    conf.addResource(new Path("/hadoop-0.20.2/conf/core-site.xml"));
    Job job = new Job(conf);
    job.setJobName("LineCount");
    job.setJarByClass(LineCount.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    //job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    FileInputFormat.setInputPaths(job, new Path("/usr/foo/hadoopIP"));
    FileOutputFormat.setOutputPath(job, new Path("/usr/foo/hadoopOP"));
    job.waitForCompletion(true);
    return 0;
    }

    public static void main(String[] args) throws Exception{
    ToolRunner.run(new LineCount(), args);
    }
    }


    --
    View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32096156.html
    Sent from the Hadoop core-user mailing list archive at Nabble.com.


    --
    Harsh J
  • Foo_foo_foo at Jul 21, 2011 at 1:02 am
    Found the issue. I created a JAR and submitted through bin/hadoop and it
    distributes the jobs as expected.
    --
    View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32104045.html
    Sent from the Hadoop core-user mailing list archive at Nabble.com.
  • Harsh J at Jul 21, 2011 at 2:31 am
    Although that solves it, its not a solution to the eclipse
    configuration issue I think. But, good to know you've found a way that
    works for you!
    On Thu, Jul 21, 2011 at 6:32 AM, foo_foo_foo wrote:

    Found the issue. I created a JAR and submitted through bin/hadoop and it
    distributes the jobs as expected.
    --
    View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32104045.html
    Sent from the Hadoop core-user mailing list archive at Nabble.com.


    --
    Harsh J

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommon-user @
categorieshadoop
postedJul 20, '11 at 2:05a
activeJul 21, '11 at 11:45a
posts6
users4
websitehadoop.apache.org...
irc#hadoop

People

Translate

site design / logo © 2022 Grokbase