HIVE-10929: In Tez mode,dynamic partitioning query with union all fails at moveTask,Invalid partition key & values (Vikram Dixit K reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbf9b0eb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbf9b0eb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbf9b0eb
Branch: refs/heads/branch-1
Commit: bbf9b0ebdffda03b038b32a34aea737440676b75
Parents: a854def
Author: Vaibhav Gumashta <vgumashta@apache.org>
Authored: Sun Jun 7 11:38:10 2015 -0700
Committer: Vaibhav Gumashta <vgumashta@apache.org>
Committed: Tue Jun 9 13:30:00 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/GenTezUtils.java | 8 +
.../tez_union_dynamic_partition.q | 21 +
.../clientpositive/tez/explainuser_2.q.out | 63 +-
.../tez/tez_union_dynamic_partition.q.out | 148 +++
.../results/clientpositive/tez/union4.q.out | 4 +
.../results/clientpositive/tez/union6.q.out | 4 +
.../tez/vector_leftsemi_mapjoin.q.out | 1032 +++++++++---------
.../tez/vector_multi_insert.q.out | 20 +-
.../clientpositive/tez/vector_outer_join1.q.out | 48 +-
.../clientpositive/tez/vector_outer_join2.q.out | 22 +-
.../clientpositive/tez/vector_outer_join3.q.out | 60 +-
.../clientpositive/tez/vector_outer_join4.q.out | 48 +-
13 files changed, 855 insertions(+), 624 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 521a189..19e5397 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -335,6 +335,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
tez_schema_evolution.q,\
tez_union.q,\
tez_union2.q,\
+ tez_union_dynamic_partition.q,\
tez_union_view.q,\
tez_union_decimal.q,\
tez_union_group_by.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 0edfc5d..11c1df6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -238,6 +239,11 @@ public class GenTezUtils {
Iterator<Operator<?>> it = newRoots.iterator();
for (Operator<?> orig: roots) {
+ Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, FileSinkOperator.class);
+ for (FileSinkOperator fsOp : fsOpSet) {
+ context.fileSinkSet.remove(fsOp);
+ }
+
Operator<?> newRoot = it.next();
replacementMap.put(orig, newRoot);
@@ -301,6 +307,8 @@ public class GenTezUtils {
linked.add(desc);
desc.setDirName(new Path(path, ""+linked.size()));
+ desc.setLinkedFileSink(true);
+ desc.setParentDir(path);
desc.setLinkedFileSinkDesc(linked);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
new file mode 100644
index 0000000..1c44a6c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
@@ -0,0 +1,21 @@
+create table dummy(i int);
+insert into table dummy values (1);
+select * from dummy;
+
+create table partunion1(id1 int) partitioned by (part1 string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+select * from partunion1;
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 0340714..222e89e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -3960,11 +3960,11 @@ Map 5 <- Union 2 (CONTAINS)
Map 7 <- Map 6 (BROADCAST_EDGE)
Map 8 <- Union 9 (CONTAINS)
-Stage-7
+Stage-15
Stats-Aggr Operator
- Stage-2
+ Stage-1
Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Stage-4
Dependency Collection{}
Stage-3
@@ -4344,18 +4344,63 @@ Stage-7
Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Please refer to the previous Select Operator [SEL_17]
-Stage-6
+Stage-14
Stats-Aggr Operator
- Stage-1
+ Stage-0
Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Please refer to the previous Stage-4
-Stage-5
+Stage-13
Stats-Aggr Operator
- Stage-0
+ Stage-2
Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
Please refer to the previous Stage-4
+Stage-12
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-19
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-18
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-17
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-16
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-20
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-9
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-22
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-8
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-21
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-7
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-6
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-10
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-11
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-5
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
PREHOOK: query: explain
FROM
(
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
new file mode 100644
index 0000000..68a7531
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
@@ -0,0 +1,148 @@
+PREHOOK: query: create table dummy(i int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: create table dummy(i int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: insert into table dummy values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@dummy
+POSTHOOK: query: insert into table dummy values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: select * from dummy
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dummy
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+#### A masked pattern was here ####
+1
+PREHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partunion1
+PREHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dummy
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int), '2014' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dummy
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2 (type: int), '2014' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part1
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@partunion1@part1=2014
+POSTHOOK: Lineage: partunion1 PARTITION(part1=2014).id1 EXPRESSION []
+PREHOOK: query: select * from partunion1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partunion1
+PREHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+POSTHOOK: query: select * from partunion1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partunion1
+POSTHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+1 2014
+2 2014
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/test/results/clientpositive/tez/union4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out
index 7cd6099..9d079ad 100644
--- a/ql/src/test/results/clientpositive/tez/union4.q.out
+++ b/ql/src/test/results/clientpositive/tez/union4.q.out
@@ -33,6 +33,7 @@ STAGE DEPENDENCIES:
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -139,6 +140,9 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Stats-Aggr Operator
+
PREHOOK: query: insert overwrite table tmptable
select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
UNION ALL
http://git-wip-us.apache.org/repos/asf/hive/blob/bbf9b0eb/ql/src/test/results/clientpositive/tez/union6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out
index 77f1d0e..4647278 100644
--- a/ql/src/test/results/clientpositive/tez/union6.q.out
+++ b/ql/src/test/results/clientpositive/tez/union6.q.out
@@ -31,6 +31,7 @@ STAGE DEPENDENCIES:
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
@@ -111,6 +112,9 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Stats-Aggr Operator
+
PREHOOK: query: insert overwrite table tmptable
select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
UNION ALL