I am running the following Hadoop program in Fully Distributed Mode to count
the number of lines in a file. I am running this job from eclipse and I see
it running (based on the output to the eclipse console) but I do not see the
tasks in the TaskTracker web interface. Also eventhough the data is
distributed accross multiple hosts it doesnt seem to be distributing works
accross hosts.
Could someone pelase help me with this.
package LineCount;
import java.util.*;
import java.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.*;
public class LineCount extends Configured implements Tool {
public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>
{
private static int counter = 1;
private static Text mapOpKey = new Text();
private final static IntWritable mapOpValue = new IntWritable(1);
@Override
public void map(LongWritable mapInpKey, Text mapInpValue,
Mapper<LongWritable,Text,Text,IntWritable>.Context context) throws
IOException,InterruptedException{
System.out.println("Calling Map "+ counter);
counter++;
mapOpKey.set("Number Of Lines");
context.write(mapOpKey, mapOpValue);
}
}
public static class Reduce extends
Reducer<Text,IntWritable,Text,IntWritable> {
private static int counter = 1;
@Override
public void reduce(Text redIpKey, Iterable<IntWritable> redIpValue,
Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws
IOException,InterruptedException {
int sum=0;
System.out.println("Calling Reduce "+ counter);
counter++;
while(redIpValue.iterator().hasNext()){
sum = sum + redIpValue.iterator().next().get();
}
context.write(redIpKey, new IntWritable(sum));
}
}
@Override
public int run(String[] args) throws Exception{
Configuration conf = new Configuration();
conf.addResource(new Path("/hadoop-0.20.2/conf/core-site.xml"));
Job job = new Job(conf);
job.setJobName("LineCount");
job.setJarByClass(LineCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, new Path("/usr/foo/hadoopIP"));
FileOutputFormat.setOutputPath(job, new Path("/usr/foo/hadoopOP"));
job.waitForCompletion(true);
return 0;
}
public static void main(String[] args) throws Exception{
ToolRunner.run(new LineCount(), args);
}
}
--
View this message in context: http://old.nabble.com/Job-progress-not-showing-in-Hadoop-Tasktracker--web-interface-tp32096156p32096156.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.