FAQ
Author: xuefu
Date: Thu Dec 18 18:54:04 2014
New Revision: 1646510

URL: http://svn.apache.org/r1646510
Log:
HIVE-9127: Improve CombineHiveInputFormat.getSplit performance (Brock via Xuefu)

Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu Dec 18 18:54:04 2014
@@ -419,8 +419,9 @@ public final class Utilities {
        LOG.info("No plan file found: "+path);
        return null;
      } catch (Exception e) {
- LOG.error("Failed to load plan: "+path, e);
- throw new RuntimeException(e);
+ String msg = "Failed to load plan: " + path + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
      } finally {
        if (in != null) {
          try {
@@ -702,11 +703,11 @@ public final class Utilities {

        // Cache the plan in this process
        gWorkMap.put(planPath, w);
-
        return planPath;
      } catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException(e);
+ String msg = "Error caching " + name + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
      }
    }


Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Dec 18 18:54:04 2014
@@ -82,8 +82,9 @@ public class CombineHiveInputFormat<K ex
     */
    public static class CombineHiveInputSplit extends InputSplitShim {

- String inputFormatClassName;
- CombineFileSplit inputSplitShim;
+ private String inputFormatClassName;
+ private CombineFileSplit inputSplitShim;
+ private Map<String, PartitionDesc> pathToPartitionInfo;

      public CombineHiveInputSplit() throws IOException {
        this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -93,20 +94,25 @@ public class CombineHiveInputFormat<K ex
      public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException {
        this(inputSplitShim.getJob(), inputSplitShim);
      }
-
      public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim)
          throws IOException {
+ this(job, inputSplitShim, null);
+ }
+ public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
+ Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
        this.inputSplitShim = inputSplitShim;
+ this.pathToPartitionInfo = pathToPartitionInfo;
        if (job != null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(job).getPathToPartitionInfo();
+ if (this.pathToPartitionInfo == null) {
+ this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+ }

          // extract all the inputFormatClass names for each chunk in the
          // CombinedSplit.
          Path[] ipaths = inputSplitShim.getPaths();
          if (ipaths.length > 0) {
            PartitionDesc part = HiveFileFormatUtils
- .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+ .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
                    ipaths[0], IOPrepareCache.get().getPartitionDescMap());
            inputFormatClassName = part.getInputFileFormatClass().getName();
          }
@@ -215,8 +221,9 @@ public class CombineHiveInputFormat<K ex
        inputSplitShim.write(out);

        if (inputFormatClassName == null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(getJob()).getPathToPartitionInfo();
+ if (pathToPartitionInfo == null) {
+ pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
+ }

          // extract all the inputFormatClass names for each chunk in the
          // CombinedSplit.
@@ -268,8 +275,8 @@ public class CombineHiveInputFormat<K ex
    /**
     * Create Hive splits based on CombineFileSplit.
     */
- private InputSplit[] getCombineSplits(JobConf job,
- int numSplits) throws IOException {
+ private InputSplit[] getCombineSplits(JobConf job, int numSplits, Map<String, PartitionDesc> pathToPartitionInfo)
+ throws IOException {
      PerfLogger perfLogger = PerfLogger.getPerfLogger();
      perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
      init(job);
@@ -438,7 +445,7 @@ public class CombineHiveInputFormat<K ex
      }

      for (CombineFileSplit is : iss) {
- CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
+ CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
        result.add(csplit);
      }

@@ -505,7 +512,8 @@ public class CombineHiveInputFormat<K ex
      if (combinablePaths.size() > 0) {
        FileInputFormat.setInputPaths(job, combinablePaths.toArray
            (new Path[combinablePaths.size()]));
- InputSplit[] splits = getCombineSplits(job, numSplits);
+ Map<String, PartitionDesc> pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+ InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
        for (InputSplit split : splits) {
          result.add(split);
        }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 18, '14 at 6:54p
activeDec 18, '14 at 6:54p
posts1
users1
websitehive.apache.org

1 user in discussion

Xuefu: 1 post

People

Translate

site design / logo © 2021 Grokbase