FAQ
Author: jdere
Date: Tue Sep 23 01:56:03 2014
New Revision: 1626926

URL: http://svn.apache.org/r1626926
Log:
HIVE-8052: Vectorization: min() on TimeStamp datatype fails with error "Vector aggregate not implemented: min for type: TIMESTAMP" (Matt McCline via Jason Dere)

Added:
     hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
Modified:
     hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
     hive/trunk/itests/src/test/resources/testconfiguration.properties
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
     hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q
     hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
     hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
     hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
     hive/trunk/ql/src/test/results/clientpositive/vectorization_0.q.out
     hive/trunk/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out
     hive/trunk/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Tue Sep 23 01:56:03 2014
@@ -1712,6 +1712,9 @@ public class HiveConf extends Configurat
      HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true,
              "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" +
              "The default value is true."),
+ HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true,
+ "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" +
+ "The default value is true."),
      HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000,
          "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."),
      HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000,

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Sep 23 01:56:03 2014
@@ -158,6 +158,7 @@ minitez.query.files.shared=alter_merge_2
    vector_left_outer_join.q,\
    vector_mapjoin_reduce.q,\
    vector_string_concat.q,\
+ vectorization_0.q,\
    vectorization_12.q,\
    vectorization_13.q,\
    vectorization_14.q,\

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java Tue Sep 23 01:56:03 2014
@@ -67,6 +67,7 @@ public class VectorExpressionDescriptor
      DATE (0x040),
      TIMESTAMP (0x080),
      DATETIME_FAMILY (DATE.value | TIMESTAMP.value),
+ INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value),
      INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value),
      STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value),
      ALL_FAMILY (0xFFF);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Sep 23 01:56:03 2014
@@ -1889,47 +1889,47 @@ public class VectorizationContext {
    // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively
    // marked map-side (HASH).
    static ArrayList<AggregateDefinition> aggregatesDefinition = new ArrayList<AggregateDefinition>() {{
- add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMinLong.class));
- add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class));
- add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class));
- add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class));
- add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMaxLong.class));
- add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class));
- add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class));
- add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
- add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
- add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class));
- add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class));
- add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class));
- add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class));
- add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class));
- add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class));
- add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
- add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
- add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
- add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
- add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
- add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
- add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class));
- add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class));
- add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class));
- add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
- add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
- add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
- add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class));
- add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class));
- add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMinLong.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class));
+ add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMaxLong.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class));
+ add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class));
+ add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class));
+ add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class));
+ add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class));
+ add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class));
+ add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class));
+ add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class));
+ add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class));
+ add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class));
    }};

    public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce)

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Sep 23 01:56:03 2014
@@ -791,6 +791,7 @@ public class Vectorizer implements Physi

    boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op, boolean isTez) {
      boolean ret = false;
+ LOG.info("Validating MapWork operator " + op.getType().name());
      switch (op.getType()) {
        case MAPJOIN:
          if (op instanceof MapJoinOperator) {
@@ -827,6 +828,7 @@ public class Vectorizer implements Physi

    boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
      boolean ret = false;
+ LOG.info("Validating ReduceWork operator " + op.getType().name());
      switch (op.getType()) {
        case EXTRACT:
          ret = validateExtractOperator((ExtractOperator) op);
@@ -840,7 +842,12 @@ public class Vectorizer implements Physi
          }
          break;
        case GROUPBY:
- ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ if (HiveConf.getBoolVar(physicalContext.getConf(),
+ HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) {
+ ret = validateGroupByOperator((GroupByOperator) op, true, true);
+ } else {
+ ret = false;
+ }
          break;
        case FILTER:
          ret = validateFilterOperator((FilterOperator) op);
@@ -1071,11 +1078,11 @@ public class Vectorizer implements Physi
        VectorizationContext vc = new ValidatorVectorizationContext();
        if (vc.getVectorExpression(desc, mode) == null) {
          // TODO: this cannot happen - VectorizationContext throws in such cases.
- LOG.info("getVectorExpression returned null");
+ LOG.debug("getVectorExpression returned null");
          return false;
        }
      } catch (Exception e) {
- LOG.info("Failed to vectorize", e);
+ LOG.debug("Failed to vectorize", e);
        return false;
      }
      return true;
@@ -1098,19 +1105,19 @@ public class Vectorizer implements Physi
      if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) {
        return false;
      }
- if (aggDesc.getParameters() != null) {
- return validateExprNodeDesc(aggDesc.getParameters());
+ if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+ return false;
      }
      // See if we can vectorize the aggregation.
      try {
        VectorizationContext vc = new ValidatorVectorizationContext();
        if (vc.getAggregatorExpression(aggDesc, isReduce) == null) {
          // TODO: this cannot happen - VectorizationContext throws in such cases.
- LOG.info("getAggregatorExpression returned null");
+ LOG.debug("getAggregatorExpression returned null");
          return false;
        }
      } catch (Exception e) {
- LOG.info("Failed to vectorize", e);
+ LOG.debug("Failed to vectorize", e);
        return false;
      }
      return true;

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_0.q Tue Sep 23 01:56:03 2014
@@ -1,4 +1,180 @@
  SET hive.vectorized.execution.enabled=true;
+
+-- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1;
+
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1;
+
+EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))));
+
  SELECT AVG(cbigint),
         (-(AVG(cbigint))),
         (-6432 + AVG(cbigint)),

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_date_funcs.q Tue Sep 23 01:56:03 2014
@@ -122,4 +122,20 @@ SELECT
  FROM date_udf_flight_orc LIMIT 10;

  -- Test extracting the date part of expression that includes time
-SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1;
\ No newline at end of file
+SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1;
+
+EXPLAIN SELECT
+ min(fl_date) AS c1,
+ max(fl_date),
+ count(fl_date),
+ count(*)
+FROM date_udf_flight_orc
+ORDER BY c1;
+
+SELECT
+ min(fl_date) AS c1,
+ max(fl_date),
+ count(fl_date),
+ count(*)
+FROM date_udf_flight_orc
+ORDER BY c1;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q?rev=1626926&r1=1626925&r2=1626926&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q Tue Sep 23 01:56:03 2014
@@ -1,6 +1,7 @@
-SET hive.vectorized.execution.enabled = true;
-
  -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+-- Turning on vectorization has been temporarily moved after filling the test table
+-- due to bug HIVE-8197.
+

  CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC;

@@ -11,6 +12,8 @@ SELECT
  FROM alltypesorc
  LIMIT 40;

+SET hive.vectorized.execution.enabled = true;
+
  CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC;

  INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1;
@@ -122,3 +125,48 @@ SELECT
    second(stimestamp1)
  FROM alltypesorc_wrong
  ORDER BY c1;
+
+EXPLAIN SELECT
+ min(ctimestamp1),
+ max(ctimestamp1),
+ count(ctimestamp1),
+ count(*)
+FROM alltypesorc_string;
+
+SELECT
+ min(ctimestamp1),
+ max(ctimestamp1),
+ count(ctimestamp1),
+ count(*)
+FROM alltypesorc_string;
+
+-- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)...
+EXPLAIN SELECT
+ sum(ctimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ sum(ctimestamp1)
+FROM alltypesorc_string;
+
+EXPLAIN SELECT
+ avg(ctimestamp1),
+ variance(ctimestamp1),
+ var_pop(ctimestamp1),
+ var_samp(ctimestamp1),
+ std(ctimestamp1),
+ stddev(ctimestamp1),
+ stddev_pop(ctimestamp1),
+ stddev_samp(ctimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ avg(ctimestamp1),
+ variance(ctimestamp1),
+ var_pop(ctimestamp1),
+ var_samp(ctimestamp1),
+ std(ctimestamp1),
+ stddev(ctimestamp1),
+ stddev_pop(ctimestamp1),
+ stddev_samp(ctimestamp1)
+FROM alltypesorc_string;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out?rev=1626926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_0.q.out Tue Sep 23 01:56:03 2014
@@ -0,0 +1,1127 @@
+PREHOOK: query: -- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Use ORDER BY clauses to generate 2 stages.
+EXPLAIN
+SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ctinyint) as c1,
+ MAX(ctinyint),
+ COUNT(ctinyint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64 62 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(ctinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(ctinyint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-39856
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: struct<count:bigint,sum:double,input:tinyint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(ctinyint) as c1,
+ variance(ctinyint),
+ var_pop(ctinyint),
+ var_samp(ctinyint),
+ std(ctinyint),
+ stddev(ctinyint),
+ stddev_pop(ctinyint),
+ stddev_samp(ctinyint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536
+PREHOOK: query: EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(cbigint), max(cbigint), count(cbigint), count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(cbigint) as c1,
+ MAX(cbigint),
+ COUNT(cbigint),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-2147311592 2145498388 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(cbigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(cbigint) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1698460028409
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: cbigint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(cbigint) as c1,
+ variance(cbigint),
+ var_pop(cbigint),
+ var_samp(cbigint),
+ std(cbigint),
+ stddev(cbigint),
+ stddev_pop(cbigint),
+ stddev_samp(cbigint)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9
+PREHOOK: query: EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(cfloat), max(cfloat), count(cfloat), count()
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: float)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(cfloat) as c1,
+ MAX(cfloat),
+ COUNT(cfloat),
+ COUNT(*)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64.0 79.553 9173 12288
+PREHOOK: query: EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(cfloat)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(cfloat) as c1
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-39479.635992884636
+PREHOOK: query: EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cfloat (type: float)
+ outputColumnNames: cfloat
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: struct<count:bigint,sum:double,input:float>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ avg(cfloat) as c1,
+ variance(cfloat),
+ var_pop(cfloat),
+ var_samp(cfloat),
+ std(cfloat),
+ stddev(cfloat),
+ stddev_pop(cfloat),
+ stddev_samp(cfloat)
+FROM alltypesorc
+ORDER BY c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or (cbigint < cdouble))) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint)
+ outputColumnNames: cbigint, cfloat, ctinyint
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), (- _col0) (type: double), (-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2 (type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 + _col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT AVG(cbigint),
+ (-(AVG(cbigint))),
+ (-6432 + AVG(cbigint)),
+ STDDEV_POP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))),
+ VAR_SAMP(cbigint),
+ (-((-6432 + AVG(cbigint)))),
+ (-6432 + (-((-6432 + AVG(cbigint))))),
+ (-((-6432 + AVG(cbigint)))),
+ ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))),
+ COUNT(*),
+ SUM(cfloat),
+ (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)),
+ (-(VAR_SAMP(cbigint))),
+ ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))),
+ MIN(ctinyint),
+ (-(MIN(ctinyint)))
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%b%')
+ OR ((79.553 != cint)
+ OR (cbigint < cdouble)))
+ OR ((ctinyint >= csmallint)
+ AND ((cboolean2 = 1)
+ AND (3569 = ctinyint))))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64

Search Discussions

Discussion Posts

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 23, '14 at 1:56a
activeSep 23, '14 at 1:56a
posts2
users1
websitehive.apache.org

1 user in discussion

Jdere: 2 posts

People

Translate

site design / logo © 2021 Grokbase