Grokbase Groups Hive commits May 2015
FAQ
Repository: hive
Updated Branches:
   refs/heads/master cbd09253d -> 5afdea966


HIVE-10793 : Hybrid Hybrid Grace Hash Join : Don't allocate all hash table memory upfront (Mostafa Mokhtar, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5afdea96
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5afdea96
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5afdea96

Branch: refs/heads/master
Commit: 5afdea966ced7ca8820fc50b8130da07c96ec4b7
Parents: cbd0925
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Tue May 26 12:22:27 2015 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Tue May 26 12:22:27 2015 -0700

----------------------------------------------------------------------
  .../org/apache/hadoop/hive/conf/HiveConf.java | 6 ++---
  .../persistence/HybridHashTableContainer.java | 25 +++++++++++---------
  2 files changed, 17 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5afdea96/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index eff4d30..49b8f97 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -772,10 +772,10 @@ public class HiveConf extends Configuration {
          "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " +
          "This number should be power of 2."),
      HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" +
- " hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
+ "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
      HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" +
- " hybrid grace hash join, the minimum number of partitions to create."),
- HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024,
+ "Hybrid grace hash join, the minimum number of partitions to create."),
+ HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024,
          "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" +
          "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" +
          "joins unnecessary memory will be allocated and then trimmed."),

http://git-wip-us.apache.org/repos/asf/hive/blob/5afdea96/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index f80ffc5..cf4c71d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -217,20 +217,20 @@ public class HybridHashTableContainer

    public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable,
                                    long estimatedTableSize, HybridHashTableConf nwayConf)
- throws SerDeException, IOException {
+ throws SerDeException, IOException {
      this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
- HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
- HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
- HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
- HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
- HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
- estimatedTableSize, keyCount, memoryAvailable, nwayConf);
+ HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
+ HiveConf.getFloatVar(hconf,HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
+ HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
+ HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
+ HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
+ HiveConf.getIntVar(hconf,HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
+ estimatedTableSize, keyCount, memoryAvailable, nwayConf);
    }

    private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor,
- int memCheckFreq, int minWbSize, int minNumParts,
- long estimatedTableSize, long keyCount,
- long memoryAvailable, HybridHashTableConf nwayConf)
+ int memCheckFreq, int minWbSize, int maxWbSize, int minNumParts, long estimatedTableSize,
+ long keyCount, long memoryAvailable, HybridHashTableConf nwayConf)
        throws SerDeException, IOException {
      directWriteHelper = new MapJoinBytesTableContainer.DirectKeyValueWriter();

@@ -269,8 +269,11 @@ public class HybridHashTableContainer
          writeBufferSize = (int)(memoryThreshold / numPartitions);
        }
      }
- writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize;
+
+ // Cap WriteBufferSize to avoid large preallocations
+ writeBufferSize = writeBufferSize < minWbSize ? minWbSize : Math.min(maxWbSize, writeBufferSize);
      LOG.info("Write buffer size: " + writeBufferSize);
+
      hashPartitions = new HashPartition[numPartitions];
      int numPartitionsSpilledOnCreation = 0;
      memoryUsed = 0;

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 26, '15 at 7:22p
activeMay 26, '15 at 7:22p
posts1
users1
websitehive.apache.org

1 user in discussion

Sershe: 1 post

People

Translate

site design / logo © 2021 Grokbase