FAQ
Repository: hive
Updated Branches:
   refs/heads/master d8705a12f -> 55383d815


HIVE-13217 : Replication for HoS mapjoin small file needs to respect dfs.replication.max (Chinna Rao L , via Szehon Ho)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55383d81
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55383d81
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55383d81

Branch: refs/heads/master
Commit: 55383d815420b9d1e8ad364dd2efa8ac894abcb5
Parents: d8705a1
Author: Chinna Rao L <chinnaraol@apache.org>
Authored: Thu Mar 24 14:03:16 2016 +0530
Committer: Chinna Rao L <chinnaraol@apache.org>
Committed: Thu Mar 24 14:03:16 2016 +0530

----------------------------------------------------------------------
  .../hadoop/hive/ql/exec/SparkHashTableSinkOperator.java | 8 ++++++--
  1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/55383d81/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
index 85344fc..5837614 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
@@ -47,11 +47,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

  public class SparkHashTableSinkOperator
      extends TerminalOperator<SparkHashTableSinkDesc> implements Serializable {
- private static final int MIN_REPLICATION = 10;
    private static final long serialVersionUID = 1L;
    private final String CLASS_NAME = this.getClass().getName();
    private final PerfLogger perfLogger = SessionState.getPerfLogger();
    protected static final Logger LOG = LoggerFactory.getLogger(SparkHashTableSinkOperator.class.getName());
+ public static final String DFS_REPLICATION_MAX = "dfs.replication.max";
+ private int minReplication = 10;

    private final HashTableSinkOperator htsOperator;

@@ -73,6 +74,9 @@ public class SparkHashTableSinkOperator
      byte tag = conf.getTag();
      inputOIs[tag] = inputObjInspectors[0];
      conf.setTagOrder(new Byte[]{ tag });
+ int dfsMaxReplication = hconf.getInt(DFS_REPLICATION_MAX, minReplication);
+ // minReplication value should not cross the value of dfs.replication.max
+ minReplication = Math.min(minReplication, dfsMaxReplication);
      htsOperator.setConf(conf);
      htsOperator.initialize(hconf, inputOIs);
    }
@@ -151,7 +155,7 @@ public class SparkHashTableSinkOperator
      }
      // TODO find out numOfPartitions for the big table
      int numOfPartitions = replication;
- replication = (short) Math.max(MIN_REPLICATION, numOfPartitions);
+ replication = (short) Math.max(minReplication, numOfPartitions);
      htsOperator.console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag
        + " with group count: " + tableContainer.size() + " into file: " + path);
      // get the hashtable file and path

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 24, '16 at 8:35a
activeMar 24, '16 at 8:35a
posts1
users1
websitehive.apache.org

1 user in discussion

Chinnaraol: 1 post

People

Translate

site design / logo © 2021 Grokbase