Grokbase Groups Hive commits May 2016
FAQ
Repository: hive
Updated Branches:
   refs/heads/master 70fe31088 -> 872996629


HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87299662
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87299662
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87299662

Branch: refs/heads/master
Commit: 8729966296a041b7ea952ba67f148d2c48c27749
Parents: 70fe310
Author: Eugene Koifman <ekoifman@hortonworks.com>
Authored: Tue May 3 17:11:47 2016 -0700
Committer: Eugene Koifman <ekoifman@hortonworks.com>
Committed: Tue May 3 17:11:47 2016 -0700

----------------------------------------------------------------------
  .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 1 -
  .../dynpart_sort_optimization_acid.q.out | 120 +++++++++++++++----
  2 files changed, 100 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 06db7f9..2983d38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7030,7 +7030,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
      conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true);
      conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1);
      conf.set(AcidUtils.CONF_ACID_KEY, "true");
- conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false);

      if (table.getNumBuckets() < 1) {
        throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName());

http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index eca29df..62399e3 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -380,8 +380,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
    Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -397,12 +398,31 @@ STAGE PLANS:
                  Reduce Output Operator
                    key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                    sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                    value expressions: _col3 (type: string)
        Reduce Operator Tree:
          Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), VALUE._col2 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col2 (type: string)
+ outputColumnNames: _col0, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +++
+ Map-reduce partition columns: _col3 (type: string)
+ value expressions: 'foo' (type: string), 'bar' (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
            File Output Operator
              compressed: false
              table:
@@ -423,7 +443,7 @@ STAGE PLANS:
                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                name: default.acid

- Stage: Stage-2
+ Stage: Stage-3
      Stats-Aggr Operator

  PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08')
@@ -875,8 +895,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
    Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -892,12 +913,31 @@ STAGE PLANS:
                  Reduce Output Operator
                    key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                    sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                    value expressions: _col4 (type: int)
        Reduce Operator Tree:
          Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col3 (type: int)
+ outputColumnNames: _col0, _col4
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
+ value expressions: 'foo' (type: string), 'bar' (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
            File Output Operator
              compressed: false
              table:
@@ -919,7 +959,7 @@ STAGE PLANS:
                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                name: default.acid

- Stage: Stage-2
+ Stage: Stage-3
      Stats-Aggr Operator

  PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11
@@ -1053,8 +1093,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
    Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -1070,7 +1111,6 @@ STAGE PLANS:
                  Reduce Output Operator
                    key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                    sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                    value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
        Reduce Operator Tree:
          Select Operator
@@ -1079,6 +1119,26 @@ STAGE PLANS:
            File Output Operator
              compressed: false
              table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
+ File Output Operator
+ compressed: false
+ table:
                  input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                  output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                  serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1097,7 +1157,7 @@ STAGE PLANS:
                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                name: default.acid

- Stage: Stage-2
+ Stage: Stage-3
      Stats-Aggr Operator

  PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11
@@ -1127,8 +1187,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds=
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
    Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0

  STAGE PLANS:
    Stage: Stage-1
@@ -1144,7 +1205,6 @@ STAGE PLANS:
                  Reduce Output Operator
                    key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                    sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                    value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int)
        Reduce Operator Tree:
          Select Operator
@@ -1153,6 +1213,26 @@ STAGE PLANS:
            File Output Operator
              compressed: false
              table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+ sort order: ++++
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
+ value expressions: _col1 (type: string), _col2 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
+ File Output Operator
+ compressed: false
+ table:
                  input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                  output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                  serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1171,7 +1251,7 @@ STAGE PLANS:
                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                name: default.acid

- Stage: Stage-2
+ Stage: Stage-3
      Stats-Aggr Operator

  PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11

Search Discussions

Discussion Posts

Previous

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 2 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 3, '16 at 11:27p
activeMay 4, '16 at 12:12a
posts2
users1
websitehive.apache.org

1 user in discussion

Ekoifman: 2 posts

People

Translate

site design / logo © 2021 Grokbase