FAQ
Author: hashutosh
Date: Wed Mar 12 22:54:18 2014
New Revision: 1576978

URL: http://svn.apache.org/r1576978
Log:
HIVE-6572 : Use shimmed version of hadoop conf names for mapred.{min,max}.split.size{.*} (Sushanth Sowmyan via Ashutosh Chauhan)

Modified:
     hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
     hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
     hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
     hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
     hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
     hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
     hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
     hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
     hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
     hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java

Modified: hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java (original)
+++ hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java Wed Mar 12 22:54:18 2014
@@ -88,7 +88,9 @@ public final class HCatConstants {
     * The desired number of input splits produced for each partition. When the
     * input files are large and few, we want to split them into many splits,
     * so as to increase the parallelizm of loading the splits. Try also two
- * other parameters, mapred.min.split.size and mapred.max.split.size, to
+ * other parameters, mapred.min.split.size and mapred.max.split.size for
+ * hadoop 1.x, or mapreduce.input.fileinputformat.split.minsize and
+ * mapreduce.input.fileinputformat.split.maxsize in hadoop 2.x to
     * control the number of input splits.
     */
    public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS =

Modified: hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java (original)
+++ hive/branches/branch-0.13/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java Wed Mar 12 22:54:18 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
  import java.util.List;

  import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.hive.shims.ShimLoader;
  import org.apache.hadoop.io.LongWritable;
  import org.apache.hadoop.io.SequenceFile;
  import org.apache.hadoop.mapreduce.InputSplit;
@@ -44,7 +45,9 @@ public class RCFileMapReduceInputFormat<
    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {

- job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL);
+ job.getConfiguration().setLong(
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ SequenceFile.SYNC_INTERVAL);
      return super.getSplits(job);
    }
  }

Modified: hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java (original)
+++ hive/branches/branch-0.13/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java Wed Mar 12 22:54:18 2014
@@ -228,7 +228,8 @@ public class TestRCFileMapReduceInputFor
      Configuration jonconf = new Configuration(cloneConf);
      jonconf.set("mapred.input.dir", testDir.toString());
      JobContext context = new Job(jonconf);
- context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize);
+ context.getConfiguration().setLong(
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), maxSplitSize);
      List<InputSplit> splits = inputFormat.getSplits(context);
      assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
      int readCount = 0;

Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Wed Mar 12 22:54:18 2014
@@ -284,7 +284,9 @@ public class HiveInputFormat<K extends W
        if (headerCount != 0 || footerCount != 0) {

          // Input file has header or footer, cannot be splitted.
- conf.setLong("mapred.min.split.size", Long.MAX_VALUE);
+ conf.setLong(
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ Long.MAX_VALUE);
        }
      }


Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Mar 12 22:54:18 2014
@@ -77,8 +77,10 @@ public class OrcInputFormat implements
    VectorizedOrcInputFormat voif = new VectorizedOrcInputFormat();

    private static final Log LOG = LogFactory.getLog(OrcInputFormat.class);
- static final String MIN_SPLIT_SIZE = "mapred.min.split.size";
- static final String MAX_SPLIT_SIZE = "mapred.max.split.size";
+ static final String MIN_SPLIT_SIZE =
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE");
+ static final String MAX_SPLIT_SIZE =
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE");

    private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
    private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;

Modified: hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java (original)
+++ hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/TestRCFile.java Wed Mar 12 22:54:18 2014
@@ -58,6 +58,7 @@ import org.apache.hadoop.hive.serde2.obj
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
  import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.shims.ShimLoader;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.LongWritable;
  import org.apache.hadoop.io.Text;
@@ -640,7 +641,9 @@ public class TestRCFile {
      RCFileInputFormat inputFormat = new RCFileInputFormat();
      JobConf jobconf = new JobConf(cloneConf);
      jobconf.set("mapred.input.dir", testDir.toString());
- jobconf.setLong("mapred.min.split.size", fileLen);
+ jobconf.setLong(
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ fileLen);
      InputSplit[] splits = inputFormat.getSplits(jobconf, 1);
      RCFileRecordReader rr = new RCFileRecordReader(jobconf, (FileSplit)splits[0]);
      long lastSync = 0;
@@ -707,7 +710,9 @@ public class TestRCFile {
      RCFileInputFormat inputFormat = new RCFileInputFormat();
      JobConf jonconf = new JobConf(cloneConf);
      jonconf.set("mapred.input.dir", testDir.toString());
- jonconf.setLong("mapred.min.split.size", minSplitSize);
+ jonconf.setLong(
+ ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
+ minSplitSize);
      InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
      assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
      int readCount = 0;

Modified: hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java (original)
+++ hive/branches/branch-0.13/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java Wed Mar 12 22:54:18 2014
@@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.udf.gen
  import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.shims.ShimLoader;
  import org.apache.hadoop.io.Text;
  import org.apache.hadoop.mapred.FileInputFormat;
  import org.apache.hadoop.mapred.InputFormat;
@@ -106,8 +107,8 @@ public class TestOrcSplitElimination {
          100000, CompressionKind.NONE, 10000, 10000);
      writeData(writer);
      writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "5000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "5000");
      InputFormat<?, ?> in = new OrcInputFormat();
      FileInputFormat.setInputPaths(conf, testFilePath.toString());

@@ -184,8 +185,8 @@ public class TestOrcSplitElimination {
          100000, CompressionKind.NONE, 10000, 10000);
      writeData(writer);
      writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "150000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
      InputFormat<?, ?> in = new OrcInputFormat();
      FileInputFormat.setInputPaths(conf, testFilePath.toString());

@@ -273,8 +274,8 @@ public class TestOrcSplitElimination {
          100000, CompressionKind.NONE, 10000, 10000);
      writeData(writer);
      writer.close();
- conf.set("mapred.min.split.size", "1000");
- conf.set("mapred.max.split.size", "150000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), "1000");
+ conf.set(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), "150000");
      InputFormat<?, ?> in = new OrcInputFormat();
      FileInputFormat.setInputPaths(conf, testFilePath.toString());


Modified: hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (original)
+++ hive/branches/branch-0.13/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java Wed Mar 12 22:54:18 2014
@@ -778,8 +778,8 @@ public class Hadoop20Shims implements Ha
      ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive");
      ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size");
      ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size");
- ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack");
+ ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node");
      ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks");
      ret.put("HADOOPJOBNAME", "mapred.job.name");
      ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapred.reduce.tasks.speculative.execution");

Modified: hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java (original)
+++ hive/branches/branch-0.13/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java Wed Mar 12 22:54:18 2014
@@ -404,8 +404,8 @@ public class Hadoop20SShims extends Hado
      ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive");
      ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size");
      ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size");
- ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack");
      ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks");
      ret.put("HADOOPJOBNAME", "mapred.job.name");
      ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapred.reduce.tasks.speculative.execution");

Modified: hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java?rev=1576978&r1=1576977&r2=1576978&view=diff
==============================================================================
--- hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (original)
+++ hive/branches/branch-0.13/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java Wed Mar 12 22:54:18 2014
@@ -568,8 +568,8 @@ public class Hadoop23Shims extends Hadoo
      ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapreduce.input.fileinputformat.input.dir.recursive");
      ret.put("MAPREDMAXSPLITSIZE", "mapreduce.input.fileinputformat.split.maxsize");
      ret.put("MAPREDMINSPLITSIZE", "mapreduce.input.fileinputformat.split.minsize");
- ret.put("MAPREDMINSPLITSIZEPERNODE", "mapreduce.input.fileinputformat.split.minsize.per.rack");
- ret.put("MAPREDMINSPLITSIZEPERRACK", "mapreduce.input.fileinputformat.split.minsize.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERNODE", "mapreduce.input.fileinputformat.split.minsize.per.node");
+ ret.put("MAPREDMINSPLITSIZEPERRACK", "mapreduce.input.fileinputformat.split.minsize.per.rack");
      ret.put("HADOOPNUMREDUCERS", "mapreduce.job.reduces");
      ret.put("HADOOPJOBNAME", "mapreduce.job.name");
      ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapreduce.reduce.speculative");

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 12, '14 at 10:54p
activeMar 12, '14 at 10:54p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase