FAQ
Repository: hive
Updated Branches:
   refs/heads/master 4cd1101b8 -> f87b2b637


HIVE-13233: Use min and max values to estimate better stats for comparison operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f87b2b63
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f87b2b63
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f87b2b63

Branch: refs/heads/master
Commit: f87b2b6372fdf0b8e386a8364b49429e2ce7bad2
Parents: 4cd1101
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Mar 10 19:14:58 2016 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Tue Mar 15 10:42:10 2016 +0100

----------------------------------------------------------------------
  .../stats/annotation/StatsRulesProcFactory.java | 184 ++++++++++++++++++-
  .../apache/hadoop/hive/ql/stats/StatsUtils.java | 2 +
  .../clientpositive/annotate_stats_filter.q | 12 +-
  .../clientpositive/annotate_stats_filter.q.out | 176 ++++++++++++++++--
  .../annotate_stats_join_pkfk.q.out | 62 +++----
  .../clientpositive/annotate_stats_part.q.out | 12 +-
  .../clientpositive/tez/explainuser_1.q.out | 32 ++--
  7 files changed, 408 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index f273d25..4bcf6bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -26,8 +26,6 @@ import java.util.Map;
  import java.util.Map.Entry;
  import java.util.Stack;

-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
  import org.apache.hadoop.hive.conf.HiveConf;
  import org.apache.hadoop.hive.ql.ErrorMsg;
  import org.apache.hadoop.hive.ql.exec.ColumnInfo;
@@ -80,6 +78,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
  import org.apache.hadoop.hive.serde.serdeConstants;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;

  import com.google.common.collect.Lists;
  import com.google.common.collect.Maps;
@@ -500,6 +500,181 @@ public class StatsRulesProcFactory {
        return maxNoNulls;
      }

+ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc) {
+ long numRows = stats.getNumRows();
+ GenericUDF udf = genFunc.getGenericUDF();
+
+ ExprNodeColumnDesc columnDesc;
+ ExprNodeConstantDesc constantDesc;
+ boolean upperBound;
+ String boundValue = null;
+ if (genFunc.getChildren().get(0) instanceof ExprNodeColumnDesc &&
+ genFunc.getChildren().get(1) instanceof ExprNodeConstantDesc) {
+ columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(0);
+ constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(1);
+ // Comparison to null will always return false
+ if (constantDesc.getValue() == null) {
+ return 0;
+ }
+ if (udf instanceof GenericUDFOPEqualOrGreaterThan ||
+ udf instanceof GenericUDFOPGreaterThan) {
+ boundValue = constantDesc.getValue().toString();
+ upperBound = false;
+ } else {
+ boundValue = constantDesc.getValue().toString();
+ upperBound = true;
+ }
+ } else if (genFunc.getChildren().get(1) instanceof ExprNodeColumnDesc &&
+ genFunc.getChildren().get(0) instanceof ExprNodeConstantDesc) {
+ columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(1);
+ constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(0);
+ // Comparison to null will always return false
+ if (constantDesc.getValue() == null) {
+ return 0;
+ }
+ if (udf instanceof GenericUDFOPEqualOrGreaterThan ||
+ udf instanceof GenericUDFOPGreaterThan) {
+ boundValue = constantDesc.getValue().toString();
+ upperBound = true;
+ } else {
+ boundValue = constantDesc.getValue().toString();
+ upperBound = false;
+ }
+ } else {
+ // default
+ return numRows / 3;
+ }
+
+ ColStatistics cs = stats.getColumnStatisticsFromColName(columnDesc.getColumn());
+ if (cs != null && cs.getRange() != null &&
+ cs.getRange().maxValue != null && cs.getRange().minValue != null) {
+ String colTypeLowerCase = columnDesc.getTypeString().toLowerCase();
+ try {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) {
+ byte value = new Byte(boundValue);
+ byte maxValue = cs.getRange().maxValue.byteValue();
+ byte minValue = cs.getRange().minValue.byteValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ } else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
+ short value = new Short(boundValue);
+ short maxValue = cs.getRange().maxValue.shortValue();
+ short minValue = cs.getRange().minValue.shortValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) ||
+ colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
+ // Date is an integer internally
+ int value = new Integer(boundValue);
+ int maxValue = cs.getRange().maxValue.intValue();
+ int minValue = cs.getRange().minValue.intValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
+ long value = new Long(boundValue);
+ long maxValue = cs.getRange().maxValue.longValue();
+ long minValue = cs.getRange().minValue.longValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
+ float value = new Float(boundValue);
+ float maxValue = cs.getRange().maxValue.floatValue();
+ float minValue = cs.getRange().minValue.floatValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
+ double value = new Double(boundValue);
+ double maxValue = cs.getRange().maxValue.doubleValue();
+ double minValue = cs.getRange().minValue.doubleValue();
+ if (upperBound) {
+ if (maxValue < value) {
+ return numRows;
+ }
+ if (minValue > value) {
+ return 0;
+ }
+ } else {
+ if (minValue > value) {
+ return numRows;
+ }
+ if (maxValue < value) {
+ return 0;
+ }
+ }
+ }
+ } catch (NumberFormatException nfe) {
+ return numRows / 3;
+ }
+ }
+ // default
+ return numRows / 3;
+ }
+
      private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
          AnnotateStatsProcCtx aspCtx, List<String> neededCols,
          FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException {
@@ -578,9 +753,10 @@ public class StatsRulesProcFactory {
          } else if (udf instanceof GenericUDFOPNotEqual) {
            return numRows;
          } else if (udf instanceof GenericUDFOPEqualOrGreaterThan
- || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan
+ || udf instanceof GenericUDFOPEqualOrLessThan
+ || udf instanceof GenericUDFOPGreaterThan
udf instanceof GenericUDFOPLessThan) {
- return numRows / 3;
+ return evaluateComparator(stats, genFunc);
          } else if (udf instanceof GenericUDFOPNotNull) {
              return evaluateNotNullExpr(stats, genFunc);
          } else if (udf instanceof GenericUDFOPNull) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 9d139ba..d8acf94 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -724,6 +724,8 @@ public class StatsUtils {
        }
      } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
        cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
+ cs.setRange(csd.getDateStats().getLowValue().getDaysSinceEpoch(),
+ csd.getDateStats().getHighValue().getDaysSinceEpoch());
      } else {
        // Columns statistics for complex datatypes are not supported yet
        return null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/test/queries/clientpositive/annotate_stats_filter.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_filter.q b/ql/src/test/queries/clientpositive/annotate_stats_filter.q
index 436c053..a352a77 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_filter.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_filter.q
@@ -83,9 +83,17 @@ explain select * from loc_orc where (year=2001 and year is null) or (state='CA')
  -- numRows: 1 rawDataSize: 102
  explain select * from loc_orc where (year=2001 or year is null) and (state='CA');

--- all inequality conditions rows/3 is the rules
--- numRows: 2 rawDataSize: 204
+-- inequality conditions falling out of range. total or zero (converted to one)
+-- numRows: 1 rawDataSize: 102
+-- numRows: 8 rawDataSize: 804
  explain select * from loc_orc where locid < 30;
  explain select * from loc_orc where locid > 30;
  explain select * from loc_orc where locid <= 30;
  explain select * from loc_orc where locid >= 30;
+
+-- all inequality conditions falling within range. rows/3 is the rules
+-- numRows: 2 rawDataSize: 204
+explain select * from loc_orc where locid < 3;
+explain select * from loc_orc where locid > 3;
+explain select * from loc_orc where locid <= 3;
+explain select * from loc_orc where locid >= 3;

http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index b09ad03..7e697f1 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -856,12 +856,14 @@ STAGE PLANS:
        Processor Tree:
          ListSink

-PREHOOK: query: -- all inequality conditions rows/3 is the rules
--- numRows: 2 rawDataSize: 204
+PREHOOK: query: -- inequality conditions falling out of range. total or zero (converted to one)
+-- numRows: 1 rawDataSize: 102
+-- numRows: 8 rawDataSize: 804
  explain select * from loc_orc where locid < 30
  PREHOOK: type: QUERY
-POSTHOOK: query: -- all inequality conditions rows/3 is the rules
--- numRows: 2 rawDataSize: 204
+POSTHOOK: query: -- inequality conditions falling out of range. total or zero (converted to one)
+-- numRows: 1 rawDataSize: 102
+-- numRows: 8 rawDataSize: 804
  explain select * from loc_orc where locid < 30
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
@@ -877,14 +879,14 @@ STAGE PLANS:
              Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (locid < 30) (type: boolean)
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                  outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                    table:
                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -913,14 +915,14 @@ STAGE PLANS:
              Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (locid > 30) (type: boolean)
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                  outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                    table:
                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -949,14 +951,14 @@ STAGE PLANS:
              Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (locid <= 30) (type: boolean)
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                  outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
                    table:
                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -985,6 +987,154 @@ STAGE PLANS:
              Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (locid >= 30) (type: boolean)
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- all inequality conditions falling within range. rows/3 is the rules
+-- numRows: 2 rawDataSize: 204
+explain select * from loc_orc where locid < 3
+PREHOOK: type: QUERY
+POSTHOOK: query: -- all inequality conditions falling within range. rows/3 is the rules
+-- numRows: 2 rawDataSize: 204
+explain select * from loc_orc where locid < 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid < 3) (type: boolean)
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from loc_orc where locid > 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from loc_orc where locid > 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid > 3) (type: boolean)
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from loc_orc where locid <= 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from loc_orc where locid <= 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid <= 3) (type: boolean)
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from loc_orc where locid >= 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from loc_orc where locid >= 3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (locid >= 3) (type: boolean)
                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
index ff95252..64a57fe 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
@@ -342,31 +342,31 @@ STAGE PLANS:
              Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (s_store_sk > 0) (type: boolean)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: s_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
            TableScan
              alias: ss
              Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (ss_store_sk > 0) (type: boolean)
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: ss_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
        Reduce Operator Tree:
          Join Operator
            condition map:
@@ -375,10 +375,10 @@ STAGE PLANS:
              0 _col0 (type: int)
              1 _col0 (type: int)
            outputColumnNames: _col0
- Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
            File Output Operator
              compressed: false
- Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -472,16 +472,16 @@ STAGE PLANS:
              Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: s_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
            TableScan
              alias: ss
              Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
@@ -505,10 +505,10 @@ STAGE PLANS:
              0 _col0 (type: int)
              1 _col0 (type: int)
            outputColumnNames: _col0
- Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
            File Output Operator
              compressed: false
- Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -688,46 +688,46 @@ STAGE PLANS:
              Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (ss_store_sk > 1000) (type: boolean)
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: ss_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
            TableScan
              alias: s
              Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (s_store_sk > 1000) (type: boolean)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: s_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
            TableScan
              alias: s
              Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: (s_store_sk > 1000) (type: boolean)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: s_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
        Reduce Operator Tree:
          Join Operator
            condition map:
@@ -738,14 +738,14 @@ STAGE PLANS:
              1 _col0 (type: int)
              2 _col0 (type: int)
            outputColumnNames: _col1
- Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: _col1 (type: int)
              outputColumnNames: _col0
- Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
              File Output Operator
                compressed: false
- Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
                table:
                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -789,16 +789,16 @@ STAGE PLANS:
              Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
              Filter Operator
                predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                Select Operator
                  expressions: s_store_sk (type: int)
                  outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
            TableScan
              alias: s
              Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
@@ -824,14 +824,14 @@ STAGE PLANS:
              1 _col0 (type: int)
              2 _col0 (type: int)
            outputColumnNames: _col1
- Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
            Select Operator
              expressions: _col1 (type: int)
              outputColumnNames: _col0
- Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
              File Output Operator
                compressed: false
- Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE
                table:
                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 186f7af..131cf6a 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -493,11 +493,11 @@ STAGE PLANS:
            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
            Filter Operator
              predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
              Select Operator
                expressions: locid (type: int)
                outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
                ListSink

  PREHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001'
@@ -517,11 +517,11 @@ STAGE PLANS:
            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
            Filter Operator
              predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
              Select Operator
                expressions: locid (type: int), '2001' (type: string)
                outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
                ListSink

  PREHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001'
@@ -541,10 +541,10 @@ STAGE PLANS:
            Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
            Filter Operator
              predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
              Select Operator
                expressions: locid (type: int), '2001' (type: string)
                outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE
                ListSink


http://git-wip-us.apache.org/repos/asf/hive/blob/f87b2b63/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index b7a8174..b501f97 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -388,9 +388,9 @@ Stage-0
                    PartitionCols:_col0, _col1
                    Group By Operator [GBY_35] (rows=1 width=20)
                      Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6
- Select Operator [SEL_34] (rows=2 width=16)
+ Select Operator [SEL_34] (rows=3 width=16)
                        Output:["_col2","_col6"]
- Filter Operator [FIL_33] (rows=2 width=16)
+ Filter Operator [FIL_33] (rows=3 width=16)
                          predicate:((_col1 > 0) or (_col6 >= 0))
                          Merge Join Operator [MERGEJOIN_52] (rows=3 width=16)
                            Conds:RS_30._col0=RS_31._col0(Inner),Output:["_col1","_col2","_col6"]
@@ -491,14 +491,14 @@ Stage-0
                        Output:["_col2","_col6"]
                        Filter Operator [FIL_30] (rows=1 width=16)
                          predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0))
- Merge Join Operator [MERGEJOIN_48] (rows=2 width=16)
+ Merge Join Operator [MERGEJOIN_48] (rows=3 width=16)
                            Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col6"]
                          <-Map 10 [SIMPLE_EDGE]
                            SHUFFLE [RS_28]
                              PartitionCols:_col0
- Select Operator [SEL_26] (rows=5 width=71)
+ Select Operator [SEL_26] (rows=18 width=79)
                                Output:["_col0","_col1"]
- Filter Operator [FIL_46] (rows=5 width=71)
+ Filter Operator [FIL_46] (rows=18 width=79)
                                  predicate:((c_int > 0) and key is not null)
                                  TableScan [TS_24] (rows=20 width=80)
                                    default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"]
@@ -664,14 +664,14 @@ Stage-0
                        Output:["_col2","_col6"]
                        Filter Operator [FIL_29] (rows=1 width=20)
                          predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0))
- Merge Join Operator [MERGEJOIN_42] (rows=3 width=20)
+ Merge Join Operator [MERGEJOIN_42] (rows=4 width=20)
                            Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"]
                          <-Map 10 [SIMPLE_EDGE]
                            SHUFFLE [RS_27]
                              PartitionCols:_col0
- Select Operator [SEL_24] (rows=6 width=74)
+ Select Operator [SEL_24] (rows=20 width=80)
                                Output:["_col0","_col1"]
- Filter Operator [FIL_41] (rows=6 width=74)
+ Filter Operator [FIL_41] (rows=20 width=80)
                                  predicate:(c_int > 0)
                                  TableScan [TS_22] (rows=20 width=80)
                                    default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"]
@@ -744,9 +744,9 @@ Stage-0
                PartitionCols:_col0, _col1
                Group By Operator [GBY_29] (rows=1 width=20)
                  Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6
- Select Operator [SEL_28] (rows=2 width=16)
+ Select Operator [SEL_28] (rows=3 width=16)
                    Output:["_col2","_col6"]
- Filter Operator [FIL_27] (rows=2 width=16)
+ Filter Operator [FIL_27] (rows=3 width=16)
                      predicate:((_col1 > 0) or (_col6 >= 0))
                      Merge Join Operator [MERGEJOIN_43] (rows=3 width=16)
                        Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col1","_col2","_col6"]
@@ -1201,9 +1201,9 @@ Stage-0
      Stage-1
        Reducer 3
        File Output Operator [FS_19]
- Select Operator [SEL_18] (rows=14 width=101)
+ Select Operator [SEL_18] (rows=21 width=101)
            Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_17] (rows=14 width=101)
+ Filter Operator [FIL_17] (rows=21 width=101)
              predicate:((_col1 > 0) or (_col6 >= 0))
              Merge Join Operator [MERGEJOIN_28] (rows=21 width=101)
                Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"]
@@ -1257,9 +1257,9 @@ Stage-0
      Stage-1
        Reducer 2
        File Output Operator [FS_14]
- Select Operator [SEL_13] (rows=12 width=101)
+ Select Operator [SEL_13] (rows=24 width=101)
            Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_12] (rows=12 width=101)
+ Filter Operator [FIL_12] (rows=24 width=101)
              predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2))
              Merge Join Operator [MERGEJOIN_19] (rows=72 width=101)
                Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"]
@@ -1487,9 +1487,9 @@ Stage-0
                      PartitionCols:_col0, _col1
                      Group By Operator [GBY_41] (rows=1 width=20)
                        Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6
- Select Operator [SEL_40] (rows=2 width=16)
+ Select Operator [SEL_40] (rows=3 width=16)
                          Output:["_col2","_col6"]
- Filter Operator [FIL_39] (rows=2 width=16)
+ Filter Operator [FIL_39] (rows=3 width=16)
                            predicate:((_col1 > 0) or (_col6 >= 0))
                            Merge Join Operator [MERGEJOIN_61] (rows=3 width=16)
                              Conds:RS_36._col0=RS_37._col0(Inner),Output:["_col1","_col2","_col6"]

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 15, '16 at 9:44a
activeMar 15, '16 at 9:44a
posts1
users1
websitehive.apache.org

1 user in discussion

Jcamacho: 1 post

People

Translate

site design / logo © 2021 Grokbase