FAQ
HIVE-10929: In Tez mode,dynamic partitioning query with union all fails at moveTask,Invalid partition key & values (Vikram Dixit K reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d592303
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d592303
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d592303

Branch: refs/heads/master
Commit: 4d592303ad0f925c7d0815c003cedd45ab4f0064
Parents: d3863be
Author: Vaibhav Gumashta <vgumashta@apache.org>
Authored: Sun Jun 7 11:38:10 2015 -0700
Committer: Vaibhav Gumashta <vgumashta@apache.org>
Committed: Sun Jun 7 11:39:12 2015 -0700

----------------------------------------------------------------------
  .../test/resources/testconfiguration.properties | 1 +
  .../hadoop/hive/ql/parse/GenTezUtils.java | 8 +
  .../tez_union_dynamic_partition.q | 21 +
  .../clientpositive/tez/explainuser_2.q.out | 63 +-
  .../tez/tez_union_dynamic_partition.q.out | 148 +++
  .../results/clientpositive/tez/union4.q.out | 4 +
  .../results/clientpositive/tez/union6.q.out | 4 +
  .../tez/vector_leftsemi_mapjoin.q.out | 1032 +++++++++---------
  .../tez/vector_multi_insert.q.out | 20 +-
  .../clientpositive/tez/vector_outer_join1.q.out | 48 +-
  .../clientpositive/tez/vector_outer_join2.q.out | 22 +-
  .../clientpositive/tez/vector_outer_join3.q.out | 60 +-
  .../clientpositive/tez/vector_outer_join4.q.out | 48 +-
  13 files changed, 855 insertions(+), 624 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 47a1107..784b502 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -336,6 +336,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
    tez_schema_evolution.q,\
    tez_union.q,\
    tez_union2.q,\
+ tez_union_dynamic_partition.q,\
    tez_union_view.q,\
    tez_union_decimal.q,\
    tez_union_group_by.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 0edfc5d..11c1df6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask;
  import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
  import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
  import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
  import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
  import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -238,6 +239,11 @@ public class GenTezUtils {

      Iterator<Operator<?>> it = newRoots.iterator();
      for (Operator<?> orig: roots) {
+ Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, FileSinkOperator.class);
+ for (FileSinkOperator fsOp : fsOpSet) {
+ context.fileSinkSet.remove(fsOp);
+ }
+
        Operator<?> newRoot = it.next();

        replacementMap.put(orig, newRoot);
@@ -301,6 +307,8 @@ public class GenTezUtils {
          linked.add(desc);

          desc.setDirName(new Path(path, ""+linked.size()));
+ desc.setLinkedFileSink(true);
+ desc.setParentDir(path);
          desc.setLinkedFileSinkDesc(linked);
        }


http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
new file mode 100644
index 0000000..1c44a6c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
@@ -0,0 +1,21 @@
+create table dummy(i int);
+insert into table dummy values (1);
+select * from dummy;
+
+create table partunion1(id1 int) partitioned by (part1 string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+select * from partunion1;

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 0340714..222e89e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -3960,11 +3960,11 @@ Map 5 <- Union 2 (CONTAINS)
  Map 7 <- Map 6 (BROADCAST_EDGE)
  Map 8 <- Union 9 (CONTAINS)

-Stage-7
+Stage-15
     Stats-Aggr Operator
- Stage-2
+ Stage-1
           Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
              Stage-4
                 Dependency Collection{}
                    Stage-3
@@ -4344,18 +4344,63 @@ Stage-7
                             Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE
                             table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
                              Please refer to the previous Select Operator [SEL_17]
-Stage-6
+Stage-14
     Stats-Aggr Operator
- Stage-1
+ Stage-0
           Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
               Please refer to the previous Stage-4
-Stage-5
+Stage-13
     Stats-Aggr Operator
- Stage-0
+ Stage-2
           Move Operator
- table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+ table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
               Please refer to the previous Stage-4
+Stage-12
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-19
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-18
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-17
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-16
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-20
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-9
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-22
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-8
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-21
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-7
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-6
+ Stats-Aggr Operator
+ Please refer to the previous Stage-1
+Stage-10
+ Stats-Aggr Operator
+ Please refer to the previous Stage-2
+Stage-11
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
+Stage-5
+ Stats-Aggr Operator
+ Please refer to the previous Stage-0
  PREHOOK: query: explain
  FROM
  (

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
new file mode 100644
index 0000000..68a7531
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
@@ -0,0 +1,148 @@
+PREHOOK: query: create table dummy(i int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: create table dummy(i int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: insert into table dummy values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@dummy
+POSTHOOK: query: insert into table dummy values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: select * from dummy
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dummy
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+#### A masked pattern was here ####
+1
+PREHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partunion1
+PREHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dummy
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int), '2014' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dummy
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2 (type: int), '2014' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ part1
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.partunion1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy
+union all
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@partunion1@part1=2014
+POSTHOOK: Lineage: partunion1 PARTITION(part1=2014).id1 EXPRESSION []
+PREHOOK: query: select * from partunion1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partunion1
+PREHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+POSTHOOK: query: select * from partunion1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partunion1
+POSTHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+1 2014
+2 2014

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out
index 7cd6099..9d079ad 100644
--- a/ql/src/test/results/clientpositive/tez/union4.q.out
+++ b/ql/src/test/results/clientpositive/tez/union4.q.out
@@ -33,6 +33,7 @@ STAGE DEPENDENCIES:
    Stage-2 depends on stages: Stage-1
    Stage-0 depends on stages: Stage-2
    Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -139,6 +140,9 @@ STAGE PLANS:
    Stage: Stage-3
      Stats-Aggr Operator

+ Stage: Stage-4
+ Stats-Aggr Operator
+
  PREHOOK: query: insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                          UNION ALL

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out
index 77f1d0e..4647278 100644
--- a/ql/src/test/results/clientpositive/tez/union6.q.out
+++ b/ql/src/test/results/clientpositive/tez/union6.q.out
@@ -31,6 +31,7 @@ STAGE DEPENDENCIES:
    Stage-2 depends on stages: Stage-1
    Stage-0 depends on stages: Stage-2
    Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -111,6 +112,9 @@ STAGE PLANS:
    Stage: Stage-3
      Stats-Aggr Operator

+ Stage: Stage-4
+ Stats-Aggr Operator
+
  PREHOOK: query: insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                        UNION ALL

Search Discussions

Discussion Posts

Previous

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 5 of 9 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJun 7, '15 at 6:38p
activeJun 9, '15 at 8:30p
posts9
users1
websitehive.apache.org

1 user in discussion

Vgumashta: 9 posts

People

Translate

site design / logo © 2021 Grokbase