FAQ
Repository: hive
Updated Branches:
   refs/heads/master 219fe99a1 -> 2fd619b7a


HIVE-11895: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix udaf_percentile_approx_23.q (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2fd619b7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2fd619b7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2fd619b7

Branch: refs/heads/master
Commit: 2fd619b7ac55e90f57bc18354c7e61966383c824
Parents: 219fe99
Author: Pengcheng Xiong <pxiong@apache.org>
Authored: Thu Oct 22 14:04:47 2015 -0700
Committer: Pengcheng Xiong <pxiong@apache.org>
Committed: Thu Oct 22 14:06:25 2015 -0700

----------------------------------------------------------------------
  .../ql/optimizer/calcite/HiveCalciteUtil.java | 12 +-
  .../cbo_rp_udaf_percentile_approx_23.q | 97 +++
  .../cbo_rp_udaf_percentile_approx_23.q.out | 628 +++++++++++++++++++
  3 files changed, 735 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2fd619b7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 0200506..8e6621a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -45,6 +45,7 @@ import org.apache.calcite.rex.RexLocalRef;
  import org.apache.calcite.rex.RexNode;
  import org.apache.calcite.rex.RexOver;
  import org.apache.calcite.rex.RexRangeRef;
+import org.apache.calcite.rex.RexShuttle;
  import org.apache.calcite.rex.RexVisitor;
  import org.apache.calcite.rex.RexVisitorImpl;
  import org.apache.calcite.sql.SqlKind;
@@ -708,11 +709,18 @@ public class HiveCalciteUtil {
        String inputTabAlias) {
      List<ExprNodeDesc> exprNodes = new ArrayList<ExprNodeDesc>();
      List<RexNode> rexInputRefs = getInputRef(inputRefs, inputRel);
+ List<RexNode> exprs = inputRel.getChildExps();
      // TODO: Change ExprNodeConverter to be independent of Partition Expr
      ExprNodeConverter exprConv = new ExprNodeConverter(inputTabAlias, inputRel.getRowType(),
          new HashSet<Integer>(), inputRel.getCluster().getTypeFactory());
- for (RexNode iRef : rexInputRefs) {
- exprNodes.add(iRef.accept(exprConv));
+ for (int index = 0; index < rexInputRefs.size(); index++) {
+ if (exprs.get(index) instanceof RexLiteral) {
+ ExprNodeDesc exprNodeDesc = exprConv.visitLiteral((RexLiteral) exprs.get(index));
+ exprNodes.add(exprNodeDesc);
+ } else {
+ RexNode iRef = rexInputRefs.get(index);
+ exprNodes.add(iRef.accept(exprConv));
+ }
      }
      return exprNodes;
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/2fd619b7/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q
new file mode 100644
index 0000000..a3cf4a5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_udaf_percentile_approx_23.q
@@ -0,0 +1,97 @@
+set hive.cbo.returnpath.hiveop=true;
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx
+
+CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket;
+load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket;
+
+create table t1 (result double);
+create table t2 (result double);
+create table t3 (result double);
+create table t4 (result double);
+create table t5 (result double);
+create table t6 (result double);
+create table t7 (result array<double>);
+create table t8 (result array<double>);
+create table t9 (result array<double>);
+create table t10 (result array<double>);
+create table t11 (result array<double>);
+create table t12 (result array<double>);
+
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.map.aggr=false;
+-- disable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000);
+
+select * from t1;
+select * from t2;
+select * from t3;
+select * from t4;
+select * from t5;
+select * from t6;
+select * from t7;
+select * from t8;
+select * from t9;
+select * from t10;
+select * from t11;
+select * from t12;
+
+set hive.map.aggr=true;
+-- enable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000);
+
+select * from t1;
+select * from t2;
+select * from t3;
+select * from t4;
+select * from t5;
+select * from t6;
+select * from t7;
+select * from t8;
+select * from t9;
+select * from t10;
+select * from t11;
+select * from t12;
+
+-- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket;
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket;
+
+-- with CBO
+explain
+select percentile_approx(key, 0.5) from bucket;
+select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket;

http://git-wip-us.apache.org/repos/asf/hive/blob/2fd619b7/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out
new file mode 100644
index 0000000..02d061a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_udaf_percentile_approx_23.q.out
@@ -0,0 +1,628 @@
+PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx
+
+CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket
+POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- 0.23 changed input order of data in reducer task, which affects result of percentile_approx
+
+CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket
+PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@bucket
+POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@bucket
+PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@bucket
+POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@bucket
+PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@bucket
+POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@bucket
+PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@bucket
+POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@bucket
+PREHOOK: query: create table t1 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: create table t3 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: create table t4 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t4
+POSTHOOK: query: create table t4 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t4
+PREHOOK: query: create table t5 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t5
+POSTHOOK: query: create table t5 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t5
+PREHOOK: query: create table t6 (result double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t6
+POSTHOOK: query: create table t6 (result double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t6
+PREHOOK: query: create table t7 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t7
+POSTHOOK: query: create table t7 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t7
+PREHOOK: query: create table t8 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t8
+POSTHOOK: query: create table t8 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t8
+PREHOOK: query: create table t9 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t9
+POSTHOOK: query: create table t9 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t9
+PREHOOK: query: create table t10 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t10
+POSTHOOK: query: create table t10 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t10
+PREHOOK: query: create table t11 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t11
+POSTHOOK: query: create table t11 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t11
+PREHOOK: query: create table t12 (result array<double>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t12
+POSTHOOK: query: create table t12 (result array<double>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t12
+PREHOOK: query: -- disable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t10
+PREHOOK: Output: default@t11
+PREHOOK: Output: default@t12
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
+PREHOOK: Output: default@t4
+PREHOOK: Output: default@t5
+PREHOOK: Output: default@t6
+PREHOOK: Output: default@t7
+PREHOOK: Output: default@t8
+PREHOOK: Output: default@t9
+POSTHOOK: query: -- disable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t10
+POSTHOOK: Output: default@t11
+POSTHOOK: Output: default@t12
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t3
+POSTHOOK: Output: default@t4
+POSTHOOK: Output: default@t5
+POSTHOOK: Output: default@t6
+POSTHOOK: Output: default@t7
+POSTHOOK: Output: default@t8
+POSTHOOK: Output: default@t9
+POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+PREHOOK: query: select * from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+254.08333333333334
+PREHOOK: query: select * from t3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t4
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t5
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t5
+#### A masked pattern was here ####
+254.08333333333334
+PREHOOK: query: select * from t6
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t6
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t6
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t6
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t7
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t7
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t8
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t8
+#### A masked pattern was here ####
+[23.355555555555558,254.08333333333334,477.0625,488.38271604938274]
+PREHOOK: query: select * from t9
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t9
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t9
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t9
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t10
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t10
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t11
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t11
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t11
+#### A masked pattern was here ####
+[23.355555555555558,254.08333333333334,477.0625,488.38271604938274]
+PREHOOK: query: select * from t12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t12
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: -- enable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t10
+PREHOOK: Output: default@t11
+PREHOOK: Output: default@t12
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
+PREHOOK: Output: default@t4
+PREHOOK: Output: default@t5
+PREHOOK: Output: default@t6
+PREHOOK: Output: default@t7
+PREHOOK: Output: default@t8
+PREHOOK: Output: default@t9
+POSTHOOK: query: -- enable map-side aggregation
+FROM bucket
+insert overwrite table t1 SELECT percentile_approx(cast(key AS double), 0.5)
+insert overwrite table t2 SELECT percentile_approx(cast(key AS double), 0.5, 100)
+insert overwrite table t3 SELECT percentile_approx(cast(key AS double), 0.5, 1000)
+
+insert overwrite table t4 SELECT percentile_approx(cast(key AS int), 0.5)
+insert overwrite table t5 SELECT percentile_approx(cast(key AS int), 0.5, 100)
+insert overwrite table t6 SELECT percentile_approx(cast(key AS int), 0.5, 1000)
+
+insert overwrite table t7 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98))
+insert overwrite table t8 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t9 SELECT percentile_approx(cast(key AS double), array(0.05,0.5,0.95,0.98), 1000)
+
+insert overwrite table t10 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98))
+insert overwrite table t11 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 100)
+insert overwrite table t12 SELECT percentile_approx(cast(key AS int), array(0.05,0.5,0.95,0.98), 1000)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t10
+POSTHOOK: Output: default@t11
+POSTHOOK: Output: default@t12
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t3
+POSTHOOK: Output: default@t4
+POSTHOOK: Output: default@t5
+POSTHOOK: Output: default@t6
+POSTHOOK: Output: default@t7
+POSTHOOK: Output: default@t8
+POSTHOOK: Output: default@t9
+POSTHOOK: Lineage: t1.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t10.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t11.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t12.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t2.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t3.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t4.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t5.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t6.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t7.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t8.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+POSTHOOK: Lineage: t9.result EXPRESSION [(bucket)bucket.FieldSchema(name:key, type:double, comment:null), ]
+PREHOOK: query: select * from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+254.08333333333334
+PREHOOK: query: select * from t3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t4
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t5
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t5
+#### A masked pattern was here ####
+254.08333333333334
+PREHOOK: query: select * from t6
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t6
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t6
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t6
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: select * from t7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t7
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t7
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t8
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t8
+#### A masked pattern was here ####
+[23.355555555555558,254.08333333333334,477.0625,488.38271604938274]
+PREHOOK: query: select * from t9
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t9
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t9
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t9
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t10
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t10
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: select * from t11
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t11
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t11
+#### A masked pattern was here ####
+[23.355555555555558,254.08333333333334,477.0625,488.38271604938274]
+PREHOOK: query: select * from t12
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t12
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t12
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: -- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: bucket
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: CASE WHEN ((key < 100.0)) THEN (NaN) ELSE (key) END (type: double)
+ outputColumnNames: $f0
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: percentile_approx($f0, 0.5)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: array<double>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: percentile_approx(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: $f0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) between 340.5 and 343.0 from bucket
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+#### A masked pattern was here ####
+true
+PREHOOK: query: -- with CBO
+explain
+select percentile_approx(key, 0.5) from bucket
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with CBO
+explain
+select percentile_approx(key, 0.5) from bucket
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: bucket
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: double)
+ outputColumnNames: $f0
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: percentile_approx($f0, 0.5)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: array<double>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: percentile_approx(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: $f0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_approx(key, 0.5) between 255.0 and 257.0 from bucket
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+#### A masked pattern was here ####
+true

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 22, '15 at 9:06p
activeOct 22, '15 at 9:06p
posts1
users1
websitehive.apache.org

1 user in discussion

Pxiong: 1 post

People

Translate

site design / logo © 2021 Grokbase