FAQ
Author: namit
Date: Fri Sep 21 10:51:26 2012
New Revision: 1388412

URL: http://svn.apache.org/viewvc?rev=1388412&view=rev
Log:
HIVE-3496 Query plan for multi-join where the third table joined is a subquery containing a map-only
union with hive.auto.convert.join=true is wrong
(Kevin Wilfong via namit)



Added:
hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q
hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1388412&r1=1388411&r2=1388412&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Fri Sep 21 10:51:26 2012
@@ -399,7 +399,8 @@ public final class GenMapRedUtils {
List<Task<? extends Serializable>> parTasks = null;
if (opProcCtx.getRootTasks().contains(currentUnionTask)) {
opProcCtx.getRootTasks().remove(currentUnionTask);
- if (!opProcCtx.getRootTasks().contains(existingTask)) {
+ if (!opProcCtx.getRootTasks().contains(existingTask) &&
+ (existingTask.getParentTasks() == null || existingTask.getParentTasks().isEmpty())) {
opProcCtx.getRootTasks().add(existingTask);
}
}
@@ -423,6 +424,7 @@ public final class GenMapRedUtils {
}
}
}
+
opProcCtx.setCurrTask(existingTask);
}


Added: hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q?rev=1388412&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q Fri Sep 21 10:51:26 2012
@@ -0,0 +1,13 @@
+
+set hive.auto.convert.join=true;
+
+CREATE TABLE src11 as SELECT * FROM src;
+CREATE TABLE src12 as SELECT * FROM src;
+CREATE TABLE src13 as SELECT * FROM src;
+CREATE TABLE src14 as SELECT * FROM src;
+
+
+EXPLAIN SELECT * FROM
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out?rev=1388412&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out Fri Sep 21 10:51:26 2012
@@ -0,0 +1,563 @@
+PREHOOK: query: CREATE TABLE src11 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src11 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src11
+PREHOOK: query: CREATE TABLE src12 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src12 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src12
+PREHOOK: query: CREATE TABLE src13 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src13 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src13
+PREHOOK: query: CREATE TABLE src14 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src14 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src14
+PREHOOK: query: EXPLAIN SELECT * FROM
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src11) a) (TOK_TABREF (TOK_TABNAME src12) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src13))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src14))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-11 is a root stage , consists of Stage-14, Stage-15, Stage-1
+ Stage-14 has a backup stage: Stage-1
+ Stage-9 depends on stages: Stage-14
+ Stage-8 depends on stages: Stage-1, Stage-9, Stage-10 , consists of Stage-12, Stage-13, Stage-2
+ Stage-12 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-12
+ Stage-13 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-13
+ Stage-2
+ Stage-15 has a backup stage: Stage-1
+ Stage-10 depends on stages: Stage-15
+ Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-11
+ Conditional Operator
+
+ Stage: Stage-14
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ c-subquery1:a-subquery1:src13
+ Fetch Operator
+ limit: -1
+ c-subquery2:a-subquery2:src14
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ c-subquery1:a-subquery1:src13
+ TableScan
+ alias: src13
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ Position of Big Table: 0
+ c-subquery2:a-subquery2:src14
+ TableScan
+ alias: src14
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ Position of Big Table: 0
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-13
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ Position of Big Table: 1
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ c-subquery1:a-subquery1:src13
+ TableScan
+ alias: src13
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ c-subquery2:a-subquery2:src14
+ TableScan
+ alias: src14
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col4} {_col5} {_col0} {_col1}
+ 1 {_col0} {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col5]]
+ 1 [Column[_col1]]
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col5
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col5
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ c-subquery1:a-subquery1:src13
+ TableScan
+ alias: src13
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ c-subquery2:a-subquery2:src14
+ TableScan
+ alias: src14
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-15
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-10
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Position of Big Table: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ b
+ TableScan
+ alias: b
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 21, '12 at 11:01a
activeSep 21, '12 at 11:01a
posts1
users1
websitehive.apache.org

1 user in discussion

Namit: 1 post

People

Translate

site design / logo © 2021 Grokbase