FAQ
Repository: hive
Updated Branches:
   refs/heads/master 77474581d -> 761b5471a


HIVE-12992: Hive on tez: Bucket map join plan is incorrect (Vikram Dixit K, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/761b5471
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/761b5471
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/761b5471

Branch: refs/heads/master
Commit: 761b5471a0abbbb38ee35a715ea2d4e6d268d5a9
Parents: 7747458
Author: vikram <vikram@hortonworks.com>
Authored: Mon Mar 28 11:25:11 2016 -0700
Committer: vikram <vikram@hortonworks.com>
Committed: Mon Mar 28 11:37:32 2016 -0700

----------------------------------------------------------------------
  .../hadoop/hive/ql/exec/OperatorUtils.java | 45 ++-
  .../ql/optimizer/ReduceSinkMapJoinProc.java | 24 +-
  .../clientpositive/bucket_map_join_tez1.q | 27 ++
  .../llap/bucket_map_join_tez1.q.out | 308 +++++++++++++++++++
  .../spark/bucket_map_join_tez1.q.out | 306 ++++++++++++++++++
  .../tez/bucket_map_join_tez1.q.out | 294 ++++++++++++++++++
  6 files changed, 985 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
index 3d664c1..41507b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
@@ -26,6 +26,7 @@ import java.util.Map;
  import java.util.Set;

  import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
  import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  import org.apache.hadoop.mapred.OutputCollector;
  import org.slf4j.Logger;
@@ -80,6 +81,11 @@ public class OperatorUtils {
      return found.size() == 1 ? found.iterator().next() : null;
    }

+ public static <T> T findSingleOperatorUpstreamJoinAccounted(Operator<?> start, Class<T> clazz) {
+ Set<T> found = findOperatorsUpstreamJoinAccounted(start, clazz, new HashSet<T>());
+ return found.size() == 1 ? found.iterator().next(): null;
+ }
+
    public static <T> Set<T> findOperatorsUpstream(Collection<Operator<?>> starts, Class<T> clazz) {
      Set<T> found = new HashSet<T>();
      for (Operator<?> start : starts) {
@@ -101,6 +107,34 @@ public class OperatorUtils {
      return found;
    }

+ public static <T> Set<T> findOperatorsUpstreamJoinAccounted(Operator<?> start, Class<T> clazz,
+ Set<T> found) {
+ if (clazz.isInstance(start)) {
+ found.add((T) start);
+ }
+ int onlyIncludeIndex = -1;
+ if (start instanceof AbstractMapJoinOperator) {
+ AbstractMapJoinOperator mapJoinOp = (AbstractMapJoinOperator) start;
+ MapJoinDesc desc = (MapJoinDesc) mapJoinOp.getConf();
+ onlyIncludeIndex = desc.getPosBigTable();
+ }
+ if (start.getParentOperators() != null) {
+ int i = 0;
+ for (Operator<?> parent : start.getParentOperators()) {
+ if (onlyIncludeIndex >= 0) {
+ if (onlyIncludeIndex == i) {
+ findOperatorsUpstream(parent, clazz, found);
+ }
+ } else {
+ findOperatorsUpstream(parent, clazz, found);
+ }
+ i++;
+ }
+ }
+ return found;
+ }
+
+
    public static void setChildrenCollector(List<Operator<? extends OperatorDesc>> childOperators, OutputCollector out) {
      if (childOperators == null) {
        return;
@@ -202,7 +236,7 @@ public class OperatorUtils {
    }

    public static boolean sameRowSchema(Operator<?> operator1, Operator<?> operator2) {
- return operator1.getSchema().equals(operator2.getSchema());
+ return operator1.getSchema().equals(operator2.getSchema());
    }

    /**
@@ -220,9 +254,9 @@ public class OperatorUtils {
     * them
     */
    public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperators(
- Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
+ Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
      ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
- new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
+ new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
      List<Operator<?>> ops = new ArrayList<Operator<?>>();
      ops.add(start);
      while (!ops.isEmpty()) {
@@ -255,9 +289,9 @@ public class OperatorUtils {
     * them
     */
    public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperatorsUpstream(
- Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
+ Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
      ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
- new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
+ new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
      List<Operator<?>> ops = new ArrayList<Operator<?>>();
      ops.add(start);
      while (!ops.isEmpty()) {
@@ -296,5 +330,4 @@ public class OperatorUtils {
      }
      return numberOperators;
    }
-
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
index 1e8f30e..00afc18 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
@@ -220,8 +220,8 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
        tableSize = 1;
      }
      LOG.info("Mapjoin " + mapJoinOp + "(bucket map join = )" + joinConf.isBucketMapJoin()
- + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
- + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
+ + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
+ + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
      joinConf.getParentToInput().put(pos, parentWork.getName());
      if (keyCount != Long.MAX_VALUE) {
        joinConf.getParentKeyCounts().put(pos, keyCount);
@@ -247,10 +247,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
         * 4. If we don't find a table scan operator, it has to be a reduce side operation.
         */
        if (mapJoinWork == null) {
- Operator<?> rootOp =
- OperatorUtils.findSingleOperatorUpstream(
- mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
- ReduceSinkOperator.class);
+ Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
+ mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
+ ReduceSinkOperator.class);
          if (rootOp == null) {
            // likely we found a table scan operator
            edgeType = EdgeType.CUSTOM_EDGE;
@@ -259,10 +258,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
            edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
          }
        } else {
- Operator<?> rootOp =
- OperatorUtils.findSingleOperatorUpstream(
- mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
- TableScanOperator.class);
+ Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
+ mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
+ TableScanOperator.class);
          if (rootOp != null) {
            // likely we found a table scan operator
            edgeType = EdgeType.CUSTOM_EDGE;
@@ -320,7 +318,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
      context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);

      List<ReduceSinkOperator> reduceSinks
- = context.linkWorkWithReduceSinkMap.get(parentWork);
+ = context.linkWorkWithReduceSinkMap.get(parentWork);
      if (reduceSinks == null) {
        reduceSinks = new ArrayList<ReduceSinkOperator>();
      }
@@ -358,7 +356,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
      // let the dummy op be the parent of mapjoin op
      mapJoinOp.replaceParent(parentRS, dummyOp);
      List<Operator<? extends OperatorDesc>> dummyChildren =
- new ArrayList<Operator<? extends OperatorDesc>>();
+ new ArrayList<Operator<? extends OperatorDesc>>();
      dummyChildren.add(mapJoinOp);
      dummyOp.setChildOperators(dummyChildren);
      dummyOperators.add(dummyOp);
@@ -384,4 +382,4 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {

      return true;
    }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
index 8ed630e..95585db 100644
--- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
+++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
@@ -40,6 +40,33 @@ select count(*)
  from
  (select distinct key, value from tab_part) a join tab b on a.key = b.key;

+explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key;
+
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key;
+
+explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
+
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
+
+
  -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
  -- In this case the sub-query is chosen as the big table.
  explain

http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
index 21cfa5c..204da88 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
@@ -325,6 +325,314 @@ POSTHOOK: Input: default@tab_part
  POSTHOOK: Input: default@tab_part@ds=2008-04-08
  #### A masked pattern was here ####
  242
+PREHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
+PREHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
  PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
  -- In this case the sub-query is chosen as the big table.
  explain

http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 4899c3a..2d66d35 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -326,6 +326,312 @@ POSTHOOK: Input: default@tab_part
  POSTHOOK: Input: default@tab_part@ds=2008-04-08
  #### A masked pattern was here ####
  242
+PREHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 3 <- Map 2 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
+PREHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 4 <- Map 3 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
  PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
  -- In this case the sub-query is chosen as the big table.
  explain

http://git-wip-us.apache.org/repos/asf/hive/blob/761b5471/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
index 2e10157..30c4107 100644
--- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
@@ -315,6 +315,300 @@ POSTHOOK: Input: default@tab_part
  POSTHOOK: Input: default@tab_part@ds=2008-04-08
  #### A masked pattern was here ####
  242
+PREHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
+join
+tab_part d on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
+PREHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 0 Map 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from
+tab_part d
+join
+(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+1166
  PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
  -- In this case the sub-query is chosen as the big table.
  explain

Search Discussions

  • Vikram at Mar 28, 2016 at 6:48 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-2.0 95c2b6b9f -> fdbed3e50


    HIVE-12992: Hive on tez: Bucket map join plan is incorrect (Vikram Dixit K, reviewed by Jason Dere)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fdbed3e5
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fdbed3e5
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fdbed3e5

    Branch: refs/heads/branch-2.0
    Commit: fdbed3e503bddca01ed3f590cc9fec20130caf4b
    Parents: 95c2b6b
    Author: vikram <vikram@hortonworks.com>
    Authored: Mon Mar 28 11:25:11 2016 -0700
    Committer: vikram <vikram@hortonworks.com>
    Committed: Mon Mar 28 11:48:02 2016 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/exec/OperatorUtils.java | 45 ++-
      .../ql/optimizer/ReduceSinkMapJoinProc.java | 24 +-
      .../clientpositive/bucket_map_join_tez1.q | 27 ++
      .../llap/bucket_map_join_tez1.q.out | 308 +++++++++++++++++++
      .../spark/bucket_map_join_tez1.q.out | 306 ++++++++++++++++++
      .../tez/bucket_map_join_tez1.q.out | 294 ++++++++++++++++++
      6 files changed, 985 insertions(+), 19 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    index 4828d70..de0d8e9 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    @@ -28,6 +28,7 @@ import java.util.Set;
      import org.slf4j.Logger;
      import org.slf4j.LoggerFactory;
      import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
    +import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
      import org.apache.hadoop.hive.ql.plan.OperatorDesc;
      import org.apache.hadoop.mapred.OutputCollector;

    @@ -80,6 +81,11 @@ public class OperatorUtils {
          return found.size() == 1 ? found.iterator().next() : null;
        }

    + public static <T> T findSingleOperatorUpstreamJoinAccounted(Operator<?> start, Class<T> clazz) {
    + Set<T> found = findOperatorsUpstreamJoinAccounted(start, clazz, new HashSet<T>());
    + return found.size() == 1 ? found.iterator().next(): null;
    + }
    +
        public static <T> Set<T> findOperatorsUpstream(Collection<Operator<?>> starts, Class<T> clazz) {
          Set<T> found = new HashSet<T>();
          for (Operator<?> start : starts) {
    @@ -101,6 +107,34 @@ public class OperatorUtils {
          return found;
        }

    + public static <T> Set<T> findOperatorsUpstreamJoinAccounted(Operator<?> start, Class<T> clazz,
    + Set<T> found) {
    + if (clazz.isInstance(start)) {
    + found.add((T) start);
    + }
    + int onlyIncludeIndex = -1;
    + if (start instanceof AbstractMapJoinOperator) {
    + AbstractMapJoinOperator mapJoinOp = (AbstractMapJoinOperator) start;
    + MapJoinDesc desc = (MapJoinDesc) mapJoinOp.getConf();
    + onlyIncludeIndex = desc.getPosBigTable();
    + }
    + if (start.getParentOperators() != null) {
    + int i = 0;
    + for (Operator<?> parent : start.getParentOperators()) {
    + if (onlyIncludeIndex >= 0) {
    + if (onlyIncludeIndex == i) {
    + findOperatorsUpstream(parent, clazz, found);
    + }
    + } else {
    + findOperatorsUpstream(parent, clazz, found);
    + }
    + i++;
    + }
    + }
    + return found;
    + }
    +
    +
        public static void setChildrenCollector(List<Operator<? extends OperatorDesc>> childOperators, OutputCollector out) {
          if (childOperators == null) {
            return;
    @@ -202,7 +236,7 @@ public class OperatorUtils {
        }

        public static boolean sameRowSchema(Operator<?> operator1, Operator<?> operator2) {
    - return operator1.getSchema().equals(operator2.getSchema());
    + return operator1.getSchema().equals(operator2.getSchema());
        }

        /**
    @@ -220,9 +254,9 @@ public class OperatorUtils {
         * them
         */
        public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperators(
    - Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
    + Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
          ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
    - new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
    + new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
          List<Operator<?>> ops = new ArrayList<Operator<?>>();
          ops.add(start);
          while (!ops.isEmpty()) {
    @@ -255,9 +289,9 @@ public class OperatorUtils {
         * them
         */
        public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperatorsUpstream(
    - Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
    + Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
          ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
    - new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
    + new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
          List<Operator<?>> ops = new ArrayList<Operator<?>>();
          ops.add(start);
          while (!ops.isEmpty()) {
    @@ -296,5 +330,4 @@ public class OperatorUtils {
          }
          return numberOperators;
        }
    -
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    index c38c6d7..dd9637a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    @@ -222,8 +222,8 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
            tableSize = 1;
          }
          LOG.info("Mapjoin " + mapJoinOp + "(bucket map join = )" + joinConf.isBucketMapJoin()
    - + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
    - + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
    + + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
    + + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
          joinConf.getParentToInput().put(pos, parentWork.getName());
          if (keyCount != Long.MAX_VALUE) {
            joinConf.getParentKeyCounts().put(pos, keyCount);
    @@ -249,10 +249,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
             * 4. If we don't find a table scan operator, it has to be a reduce side operation.
             */
            if (mapJoinWork == null) {
    - Operator<?> rootOp =
    - OperatorUtils.findSingleOperatorUpstream(
    - mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    - ReduceSinkOperator.class);
    + Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
    + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    + ReduceSinkOperator.class);
              if (rootOp == null) {
                // likely we found a table scan operator
                edgeType = EdgeType.CUSTOM_EDGE;
    @@ -261,10 +260,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
                edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
              }
            } else {
    - Operator<?> rootOp =
    - OperatorUtils.findSingleOperatorUpstream(
    - mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    - TableScanOperator.class);
    + Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
    + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    + TableScanOperator.class);
              if (rootOp != null) {
                // likely we found a table scan operator
                edgeType = EdgeType.CUSTOM_EDGE;
    @@ -322,7 +320,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
          context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);

          List<ReduceSinkOperator> reduceSinks
    - = context.linkWorkWithReduceSinkMap.get(parentWork);
    + = context.linkWorkWithReduceSinkMap.get(parentWork);
          if (reduceSinks == null) {
            reduceSinks = new ArrayList<ReduceSinkOperator>();
          }
    @@ -357,7 +355,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
          // let the dummy op be the parent of mapjoin op
          mapJoinOp.replaceParent(parentRS, dummyOp);
          List<Operator<? extends OperatorDesc>> dummyChildren =
    - new ArrayList<Operator<? extends OperatorDesc>>();
    + new ArrayList<Operator<? extends OperatorDesc>>();
          dummyChildren.add(mapJoinOp);
          dummyOp.setChildOperators(dummyChildren);
          dummyOperators.add(dummyOp);
    @@ -383,4 +381,4 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {

          return true;
        }
    -}
    +}
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    index 8ed630e..95585db 100644
    --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    @@ -40,6 +40,33 @@ select count(*)
      from
      (select distinct key, value from tab_part) a join tab b on a.key = b.key;

    +explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key;
    +
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key;
    +
    +explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
    +
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
    +
    +
      -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
    index 46638e9..a140606 100644
    --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
    +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out
    @@ -321,6 +321,314 @@ POSTHOOK: Input: default@tab_part
      POSTHOOK: Input: default@tab_part@ds=2008-04-08
      #### A masked pattern was here ####
      242
    +PREHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Execution mode: llap
    + LLAP IO: no inputs
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 4
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Execution mode: llap
    + LLAP IO: no inputs
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Execution mode: llap
    + LLAP IO: no inputs
    + Reducer 3
    + Execution mode: llap
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
    +PREHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE)
    + Reducer 4 <- Map 3 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Execution mode: llap
    + LLAP IO: no inputs
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Execution mode: llap
    + LLAP IO: no inputs
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 2
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Execution mode: llap
    + LLAP IO: no inputs
    + Reducer 4
    + Execution mode: llap
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
      PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    index b5e7846..302a2c2 100644
    --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    @@ -326,6 +326,312 @@ POSTHOOK: Input: default@tab_part
      POSTHOOK: Input: default@tab_part@ds=2008-04-08
      #### A masked pattern was here ####
      242
    +PREHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-2 is a root stage
    + Stage-1 depends on stages: Stage-2
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-2
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 3 <- Map 2 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 4
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Local Work:
    + Map Reduce Local Work
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
    +PREHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-2 is a root stage
    + Stage-1 depends on stages: Stage-2
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-2
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 4 <- Map 3 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 2
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Local Work:
    + Map Reduce Local Work
    + Reducer 4
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
      PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/fdbed3e5/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    index 55c0aed..377eb16 100644
    --- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    @@ -315,6 +315,300 @@ POSTHOOK: Input: default@tab_part
      POSTHOOK: Input: default@tab_part@ds=2008-04-08
      #### A masked pattern was here ####
      242
    +PREHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 4
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
    +PREHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE)
    + Reducer 4 <- Map 3 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 2
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reducer 4
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
      PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain
  • Vikram at Mar 28, 2016 at 6:53 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1.2 510ef503b -> 0c5d33951


    HIVE-12992: Hive on tez: Bucket map join plan is incorrect (Vikram Dixit K, reviewed by Jason Dere)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0c5d3395
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0c5d3395
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0c5d3395

    Branch: refs/heads/branch-1.2
    Commit: 0c5d339515c80b575801b8aa005c1b5f298c1aaf
    Parents: 510ef50
    Author: vikram <vikram@hortonworks.com>
    Authored: Mon Mar 28 11:25:11 2016 -0700
    Committer: vikram <vikram@hortonworks.com>
    Committed: Mon Mar 28 11:50:16 2016 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/exec/OperatorUtils.java | 44 ++-
      .../ql/optimizer/ReduceSinkMapJoinProc.java | 24 +-
      .../clientpositive/bucket_map_join_tez1.q | 27 ++
      .../spark/bucket_map_join_tez1.q.out | 306 +++++++++++++++++++
      .../tez/bucket_map_join_tez1.q.out | 294 ++++++++++++++++++
      5 files changed, 677 insertions(+), 18 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/0c5d3395/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    index f00fc77..cc878dc 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    @@ -28,6 +28,7 @@ import java.util.Set;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
    +import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
      import org.apache.hadoop.hive.ql.plan.OperatorDesc;
      import org.apache.hadoop.mapred.OutputCollector;

    @@ -80,6 +81,11 @@ public class OperatorUtils {
          return found.size() == 1 ? found.iterator().next() : null;
        }

    + public static <T> T findSingleOperatorUpstreamJoinAccounted(Operator<?> start, Class<T> clazz) {
    + Set<T> found = findOperatorsUpstreamJoinAccounted(start, clazz, new HashSet<T>());
    + return found.size() == 1 ? found.iterator().next(): null;
    + }
    +
        public static <T> Set<T> findOperatorsUpstream(Collection<Operator<?>> starts, Class<T> clazz) {
          Set<T> found = new HashSet<T>();
          for (Operator<?> start : starts) {
    @@ -101,6 +107,34 @@ public class OperatorUtils {
          return found;
        }

    + public static <T> Set<T> findOperatorsUpstreamJoinAccounted(Operator<?> start, Class<T> clazz,
    + Set<T> found) {
    + if (clazz.isInstance(start)) {
    + found.add((T) start);
    + }
    + int onlyIncludeIndex = -1;
    + if (start instanceof AbstractMapJoinOperator) {
    + AbstractMapJoinOperator mapJoinOp = (AbstractMapJoinOperator) start;
    + MapJoinDesc desc = (MapJoinDesc) mapJoinOp.getConf();
    + onlyIncludeIndex = desc.getPosBigTable();
    + }
    + if (start.getParentOperators() != null) {
    + int i = 0;
    + for (Operator<?> parent : start.getParentOperators()) {
    + if (onlyIncludeIndex >= 0) {
    + if (onlyIncludeIndex == i) {
    + findOperatorsUpstream(parent, clazz, found);
    + }
    + } else {
    + findOperatorsUpstream(parent, clazz, found);
    + }
    + i++;
    + }
    + }
    + return found;
    + }
    +
    +
        public static void setChildrenCollector(List<Operator<? extends OperatorDesc>> childOperators, OutputCollector out) {
          if (childOperators == null) {
            return;
    @@ -203,7 +237,7 @@ public class OperatorUtils {
        }

        public static boolean sameRowSchema(Operator<?> operator1, Operator<?> operator2) {
    - return operator1.getSchema().equals(operator2.getSchema());
    + return operator1.getSchema().equals(operator2.getSchema());
        }

        /**
    @@ -221,9 +255,9 @@ public class OperatorUtils {
         * them
         */
        public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperators(
    - Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
    + Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
          ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
    - new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
    + new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
          List<Operator<?>> ops = new ArrayList<Operator<?>>();
          ops.add(start);
          while (!ops.isEmpty()) {
    @@ -256,9 +290,9 @@ public class OperatorUtils {
         * them
         */
        public static Multimap<Class<? extends Operator<?>>, Operator<?>> classifyOperatorsUpstream(
    - Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
    + Operator<?> start, Set<Class<? extends Operator<?>>> classes) {
          ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>> resultMap =
    - new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
    + new ImmutableMultimap.Builder<Class<? extends Operator<?>>, Operator<?>>();
          List<Operator<?>> ops = new ArrayList<Operator<?>>();
          ops.add(start);
          while (!ops.isEmpty()) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/0c5d3395/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    index 4bbcafb..43b9b05 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
    @@ -170,8 +170,8 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
            tableSize = 1;
          }
          LOG.info("Mapjoin " + mapJoinOp + "(bucket map join = )" + joinConf.isBucketMapJoin()
    - + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
    - + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
    + + ", pos: " + pos + " --> " + parentWork.getName() + " (" + keyCount
    + + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
          joinConf.getParentToInput().put(pos, parentWork.getName());
          if (keyCount != Long.MAX_VALUE) {
            joinConf.getParentKeyCounts().put(pos, keyCount);
    @@ -197,10 +197,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
             * 4. If we don't find a table scan operator, it has to be a reduce side operation.
             */
            if (mapJoinWork == null) {
    - Operator<?> rootOp =
    - OperatorUtils.findSingleOperatorUpstream(
    - mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    - ReduceSinkOperator.class);
    + Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
    + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    + ReduceSinkOperator.class);
              if (rootOp == null) {
                // likely we found a table scan operator
                edgeType = EdgeType.CUSTOM_EDGE;
    @@ -209,10 +208,9 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
                edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
              }
            } else {
    - Operator<?> rootOp =
    - OperatorUtils.findSingleOperatorUpstream(
    - mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    - TableScanOperator.class);
    + Operator<?> rootOp = OperatorUtils.findSingleOperatorUpstreamJoinAccounted(
    + mapJoinOp.getParentOperators().get(joinConf.getPosBigTable()),
    + TableScanOperator.class);
              if (rootOp != null) {
                // likely we found a table scan operator
                edgeType = EdgeType.CUSTOM_EDGE;
    @@ -267,7 +265,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
          context.linkOpWithWorkMap.put(mapJoinOp, linkWorkMap);

          List<ReduceSinkOperator> reduceSinks
    - = context.linkWorkWithReduceSinkMap.get(parentWork);
    + = context.linkWorkWithReduceSinkMap.get(parentWork);
          if (reduceSinks == null) {
            reduceSinks = new ArrayList<ReduceSinkOperator>();
          }
    @@ -301,7 +299,7 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {
          // let the dummy op be the parent of mapjoin op
          mapJoinOp.replaceParent(parentRS, dummyOp);
          List<Operator<? extends OperatorDesc>> dummyChildren =
    - new ArrayList<Operator<? extends OperatorDesc>>();
    + new ArrayList<Operator<? extends OperatorDesc>>();
          dummyChildren.add(mapJoinOp);
          dummyOp.setChildOperators(dummyChildren);
          dummyOperators.add(dummyOp);
    @@ -327,4 +325,4 @@ public class ReduceSinkMapJoinProc implements NodeProcessor {

          return true;
        }
    -}
    +}
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/0c5d3395/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    index 494614d..92f81e4 100644
    --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q
    @@ -38,6 +38,33 @@ select count(*)
      from
      (select distinct key, value from tab_part) a join tab b on a.key = b.key;

    +explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key;
    +
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key;
    +
    +explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
    +
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key;
    +
    +
      -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/0c5d3395/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    index 2c14065..360cc18 100644
    --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
    @@ -314,6 +314,312 @@ POSTHOOK: Input: default@tab_part
      POSTHOOK: Input: default@tab_part@ds=2008-04-08
      #### A masked pattern was here ####
      242
    +PREHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-2 is a root stage
    + Stage-1 depends on stages: Stage-2
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-2
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 3 <- Map 2 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 4
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Local Work:
    + Map Reduce Local Work
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
    +PREHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-2 is a root stage
    + Stage-1 depends on stages: Stage-2
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-2
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Spark HashTable Sink Operator
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 4 <- Map 3 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 2
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Local Work:
    + Map Reduce Local Work
    + Reducer 4
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
      PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/0c5d3395/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    index af5e6e6..278ca11 100644
    --- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out
    @@ -301,6 +301,300 @@ POSTHOOK: Input: default@tab_part
      POSTHOOK: Input: default@tab_part@ds=2008-04-08
      #### A masked pattern was here ####
      242
    +PREHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 4
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: b
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c
    +join
    +tab_part d on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
    +PREHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Edges:
    + Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE)
    + Reducer 4 <- Map 3 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: d
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + outputColumnNames: _col0
    + input vertices:
    + 0 Map 2
    + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 0 Map 1
    + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reducer 4
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tab
    +PREHOOK: Input: default@tab@ds=2008-04-08
    +PREHOOK: Input: default@tab_part
    +PREHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*)
    +from
    +tab_part d
    +join
    +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tab
    +POSTHOOK: Input: default@tab@ds=2008-04-08
    +POSTHOOK: Input: default@tab_part
    +POSTHOOK: Input: default@tab_part@ds=2008-04-08
    +#### A masked pattern was here ####
    +1166
      PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table.
      -- In this case the sub-query is chosen as the big table.
      explain

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 28, '16 at 6:47p
activeMar 28, '16 at 6:53p
posts3
users1
websitehive.apache.org

1 user in discussion

Vikram: 3 posts

People

Translate

site design / logo © 2021 Grokbase