FAQ
Repository: hive
Updated Branches:
   refs/heads/master 0f1c112fc -> cac5804de


HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cac5804d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cac5804d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cac5804d

Branch: refs/heads/master
Commit: cac5804de034ad54821e0524091cff0f4a97476b
Parents: 0f1c112
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Thu Dec 17 13:38:57 2015 -0600
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Thu Dec 17 13:38:57 2015 -0600

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 ++--
  .../test/queries/clientpositive/decimal_stats.q | 16 +++
  .../results/clientpositive/decimal_stats.q.out | 106 +++++++++++++++++++
  3 files changed, 135 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 149cbc1..2f78fe8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -709,13 +709,19 @@ public class StatsUtils {
        cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
        cs.setCountDistint(csd.getDecimalStats().getNumDVs());
        cs.setNumNulls(csd.getDecimalStats().getNumNulls());
- Decimal val = csd.getDecimalStats().getHighValue();
- BigDecimal maxVal = HiveDecimal.
- create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
- val = csd.getDecimalStats().getLowValue();
- BigDecimal minVal = HiveDecimal.
- create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
- cs.setRange(minVal, maxVal);
+ Decimal highValue = csd.getDecimalStats().getHighValue();
+ Decimal lowValue = csd.getDecimalStats().getLowValue();
+ if (highValue != null && highValue.getUnscaled() != null
+ && lowValue != null && lowValue.getUnscaled() != null) {
+ HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale());
+ BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
+ HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
+ BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
+
+ if (minVal != null && maxVal != null) {
+ cs.setRange(minVal, maxVal);
+ }
+ }
      } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
        cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
      } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/queries/clientpositive/decimal_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q
new file mode 100644
index 0000000..2370e7d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/decimal_stats.q
@@ -0,0 +1,16 @@
+set hive.stats.fetch.column.stats=true;
+drop table if exists decimal_1;
+
+create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
+
+desc decimal_1;
+
+insert overwrite table decimal_1
+ select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
+
+analyze table decimal_1 compute statistics for columns;
+
+desc formatted decimal_1 v;
+
+explain select * from decimal_1 order by 1 limit 100;
+drop table decimal_1;

http://git-wip-us.apache.org/repos/asf/hive/blob/cac5804d/ql/src/test/results/clientpositive/decimal_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
new file mode 100644
index 0000000..dabf7f8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
@@ -0,0 +1,106 @@
+PREHOOK: query: drop table if exists decimal_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists decimal_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_1
+PREHOOK: query: desc decimal_1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc decimal_1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+t decimal(4,2)
+u decimal(5,0)
+v decimal(10,0)
+PREHOOK: query: insert overwrite table decimal_1
+ select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: insert overwrite table decimal_1
+ select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@decimal_1
+POSTHOOK: Lineage: decimal_1.t EXPRESSION []
+POSTHOOK: Lineage: decimal_1.u EXPRESSION []
+POSTHOOK: Lineage: decimal_1.v EXPRESSION []
+PREHOOK: query: analyze table decimal_1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table decimal_1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_1
+#### A masked pattern was here ####
+PREHOOK: query: desc formatted decimal_1 v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@decimal_1
+POSTHOOK: query: desc formatted decimal_1 v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@decimal_1
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+v decimal(10,0) 500 1 from deserializer
+PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: decimal_1
+ Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 1 (type: int)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table decimal_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_1
+PREHOOK: Output: default@decimal_1
+POSTHOOK: query: drop table decimal_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_1
+POSTHOOK: Output: default@decimal_1

Search Discussions

  • Prasanthj at Dec 17, 2015 at 7:41 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-2.0 6fed7783d -> 7ca1c4d58


    HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7ca1c4d5
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7ca1c4d5
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7ca1c4d5

    Branch: refs/heads/branch-2.0
    Commit: 7ca1c4d5849da3d83a84ab51c189f5912948f338
    Parents: 6fed778
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 17 13:38:57 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 17 13:41:15 2015 -0600

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 ++--
      .../test/queries/clientpositive/decimal_stats.q | 16 +++
      .../results/clientpositive/decimal_stats.q.out | 106 +++++++++++++++++++
      3 files changed, 135 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    index 149cbc1..2f78fe8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    @@ -709,13 +709,19 @@ public class StatsUtils {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
            cs.setCountDistint(csd.getDecimalStats().getNumDVs());
            cs.setNumNulls(csd.getDecimalStats().getNumNulls());
    - Decimal val = csd.getDecimalStats().getHighValue();
    - BigDecimal maxVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - val = csd.getDecimalStats().getLowValue();
    - BigDecimal minVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - cs.setRange(minVal, maxVal);
    + Decimal highValue = csd.getDecimalStats().getHighValue();
    + Decimal lowValue = csd.getDecimalStats().getLowValue();
    + if (highValue != null && highValue.getUnscaled() != null
    + && lowValue != null && lowValue.getUnscaled() != null) {
    + HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale());
    + BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
    + HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
    + BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
    +
    + if (minVal != null && maxVal != null) {
    + cs.setRange(minVal, maxVal);
    + }
    + }
          } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
          } else {

    http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/test/queries/clientpositive/decimal_stats.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q
    new file mode 100644
    index 0000000..2370e7d
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/decimal_stats.q
    @@ -0,0 +1,16 @@
    +set hive.stats.fetch.column.stats=true;
    +drop table if exists decimal_1;
    +
    +create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
    +
    +desc decimal_1;
    +
    +insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
    +
    +analyze table decimal_1 compute statistics for columns;
    +
    +desc formatted decimal_1 v;
    +
    +explain select * from decimal_1 order by 1 limit 100;
    +drop table decimal_1;

    http://git-wip-us.apache.org/repos/asf/hive/blob/7ca1c4d5/ql/src/test/results/clientpositive/decimal_stats.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
    new file mode 100644
    index 0000000..dabf7f8
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
    @@ -0,0 +1,106 @@
    +PREHOOK: query: drop table if exists decimal_1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table if exists decimal_1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@decimal_1
    +PREHOOK: query: desc decimal_1
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc decimal_1
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +t decimal(4,2)
    +u decimal(5,0)
    +v decimal(10,0)
    +PREHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@decimal_1
    +POSTHOOK: Lineage: decimal_1.t EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.u EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.v EXPRESSION []
    +PREHOOK: query: analyze table decimal_1 compute statistics for columns
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +POSTHOOK: query: analyze table decimal_1 compute statistics for columns
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +PREHOOK: query: desc formatted decimal_1 v
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc formatted decimal_1 v
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
    +
    +v decimal(10,0) 500 1 from deserializer
    +PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: decimal_1
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Select Operator
    + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Reduce Output Operator
    + key expressions: 1 (type: int)
    + sort order: +
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + TopN Hash Memory Usage: 0.1
    + value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0))
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Limit
    + Number of rows: 100
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 100
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table decimal_1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@decimal_1
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: drop table decimal_1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@decimal_1
    +POSTHOOK: Output: default@decimal_1
  • Prasanthj at Dec 17, 2015 at 8:05 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1.0 57788decd -> b56623b18


    HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b56623b1
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b56623b1
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b56623b1

    Branch: refs/heads/branch-1.0
    Commit: b56623b183e8538384f8bd4533e2d3a8e46b31d3
    Parents: 57788de
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 17 13:38:57 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 17 14:04:52 2015 -0600

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 ++--
      .../test/queries/clientpositive/decimal_stats.q | 16 +++
      .../results/clientpositive/decimal_stats.q.out | 106 +++++++++++++++++++
      3 files changed, 135 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b56623b1/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    index 26cf56d..f7e004a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    @@ -590,13 +590,19 @@ public class StatsUtils {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
            cs.setCountDistint(csd.getDecimalStats().getNumDVs());
            cs.setNumNulls(csd.getDecimalStats().getNumNulls());
    - Decimal val = csd.getDecimalStats().getHighValue();
    - BigDecimal maxVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - val = csd.getDecimalStats().getLowValue();
    - BigDecimal minVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - cs.setRange(minVal, maxVal);
    + Decimal highValue = csd.getDecimalStats().getHighValue();
    + Decimal lowValue = csd.getDecimalStats().getLowValue();
    + if (highValue != null && highValue.getUnscaled() != null
    + && lowValue != null && lowValue.getUnscaled() != null) {
    + HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale());
    + BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
    + HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
    + BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
    +
    + if (minVal != null && maxVal != null) {
    + cs.setRange(minVal, maxVal);
    + }
    + }
          } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
          } else {

    http://git-wip-us.apache.org/repos/asf/hive/blob/b56623b1/ql/src/test/queries/clientpositive/decimal_stats.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q
    new file mode 100644
    index 0000000..2370e7d
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/decimal_stats.q
    @@ -0,0 +1,16 @@
    +set hive.stats.fetch.column.stats=true;
    +drop table if exists decimal_1;
    +
    +create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
    +
    +desc decimal_1;
    +
    +insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
    +
    +analyze table decimal_1 compute statistics for columns;
    +
    +desc formatted decimal_1 v;
    +
    +explain select * from decimal_1 order by 1 limit 100;
    +drop table decimal_1;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b56623b1/ql/src/test/results/clientpositive/decimal_stats.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
    new file mode 100644
    index 0000000..dabf7f8
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
    @@ -0,0 +1,106 @@
    +PREHOOK: query: drop table if exists decimal_1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table if exists decimal_1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@decimal_1
    +PREHOOK: query: desc decimal_1
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc decimal_1
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +t decimal(4,2)
    +u decimal(5,0)
    +v decimal(10,0)
    +PREHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@decimal_1
    +POSTHOOK: Lineage: decimal_1.t EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.u EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.v EXPRESSION []
    +PREHOOK: query: analyze table decimal_1 compute statistics for columns
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +POSTHOOK: query: analyze table decimal_1 compute statistics for columns
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +PREHOOK: query: desc formatted decimal_1 v
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc formatted decimal_1 v
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
    +
    +v decimal(10,0) 500 1 from deserializer
    +PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: decimal_1
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Select Operator
    + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Reduce Output Operator
    + key expressions: 1 (type: int)
    + sort order: +
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + TopN Hash Memory Usage: 0.1
    + value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0))
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Limit
    + Number of rows: 100
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 100
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table decimal_1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@decimal_1
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: drop table decimal_1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@decimal_1
    +POSTHOOK: Output: default@decimal_1
  • Prasanthj at Dec 24, 2015 at 9:23 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 11b0415e4 -> 1a159bc1b


    HIVE-12684: NPE in stats annotation when all values in decimal column are NULLs (Prasanth Jayachandran reviewed by Pengcheng Xiong)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a159bc1
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a159bc1
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a159bc1

    Branch: refs/heads/branch-1
    Commit: 1a159bc1b3bc4490bba9dc149e6de7feb9277710
    Parents: 11b0415
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 17 13:38:57 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 24 15:23:04 2015 -0600

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 ++--
      .../test/queries/clientpositive/decimal_stats.q | 16 +++
      .../results/clientpositive/decimal_stats.q.out | 106 +++++++++++++++++++
      3 files changed, 135 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/1a159bc1/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    index f67ddf1..1e4c85e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    @@ -692,13 +692,19 @@ public class StatsUtils {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
            cs.setCountDistint(csd.getDecimalStats().getNumDVs());
            cs.setNumNulls(csd.getDecimalStats().getNumNulls());
    - Decimal val = csd.getDecimalStats().getHighValue();
    - BigDecimal maxVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - val = csd.getDecimalStats().getLowValue();
    - BigDecimal minVal = HiveDecimal.
    - create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
    - cs.setRange(minVal, maxVal);
    + Decimal highValue = csd.getDecimalStats().getHighValue();
    + Decimal lowValue = csd.getDecimalStats().getLowValue();
    + if (highValue != null && highValue.getUnscaled() != null
    + && lowValue != null && lowValue.getUnscaled() != null) {
    + HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale());
    + BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
    + HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
    + BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
    +
    + if (minVal != null && maxVal != null) {
    + cs.setRange(minVal, maxVal);
    + }
    + }
          } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
          } else {

    http://git-wip-us.apache.org/repos/asf/hive/blob/1a159bc1/ql/src/test/queries/clientpositive/decimal_stats.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q
    new file mode 100644
    index 0000000..2370e7d
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/decimal_stats.q
    @@ -0,0 +1,16 @@
    +set hive.stats.fetch.column.stats=true;
    +drop table if exists decimal_1;
    +
    +create table decimal_1 (t decimal(4,2), u decimal(5), v decimal);
    +
    +desc decimal_1;
    +
    +insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src;
    +
    +analyze table decimal_1 compute statistics for columns;
    +
    +desc formatted decimal_1 v;
    +
    +explain select * from decimal_1 order by 1 limit 100;
    +drop table decimal_1;

    http://git-wip-us.apache.org/repos/asf/hive/blob/1a159bc1/ql/src/test/results/clientpositive/decimal_stats.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out
    new file mode 100644
    index 0000000..dabf7f8
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out
    @@ -0,0 +1,106 @@
    +PREHOOK: query: drop table if exists decimal_1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table if exists decimal_1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: create table decimal_1 (t decimal(4,2), u decimal(5), v decimal)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@decimal_1
    +PREHOOK: query: desc decimal_1
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc decimal_1
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +t decimal(4,2)
    +u decimal(5,0)
    +v decimal(10,0)
    +PREHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: insert overwrite table decimal_1
    + select cast('17.29' as decimal(4,2)), 3.1415926BD, null from src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@decimal_1
    +POSTHOOK: Lineage: decimal_1.t EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.u EXPRESSION []
    +POSTHOOK: Lineage: decimal_1.v EXPRESSION []
    +PREHOOK: query: analyze table decimal_1 compute statistics for columns
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +POSTHOOK: query: analyze table decimal_1 compute statistics for columns
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@decimal_1
    +#### A masked pattern was here ####
    +PREHOOK: query: desc formatted decimal_1 v
    +PREHOOK: type: DESCTABLE
    +PREHOOK: Input: default@decimal_1
    +POSTHOOK: query: desc formatted decimal_1 v
    +POSTHOOK: type: DESCTABLE
    +POSTHOOK: Input: default@decimal_1
    +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
    +
    +v decimal(10,0) 500 1 from deserializer
    +PREHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: decimal_1
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Select Operator
    + expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Reduce Output Operator
    + key expressions: 1 (type: int)
    + sort order: +
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + TopN Hash Memory Usage: 0.1
    + value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0))
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0))
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE
    + Limit
    + Number of rows: 100
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 100
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table decimal_1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@decimal_1
    +PREHOOK: Output: default@decimal_1
    +POSTHOOK: query: drop table decimal_1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@decimal_1
    +POSTHOOK: Output: default@decimal_1

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 17, '15 at 7:39p
activeDec 24, '15 at 9:23p
posts4
users1
websitehive.apache.org

1 user in discussion

Prasanthj: 4 posts

People

Translate

site design / logo © 2021 Grokbase