FAQ
Repository: hive
Updated Branches:
   refs/heads/master 860ba06fb -> 4f90a7156


HIVE-9534: incorrect result set for query that projects a windowed aggregate (Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4f90a715
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4f90a715
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4f90a715

Branch: refs/heads/master
Commit: 4f90a71565d47f4b9d8ce267ace206d6a1f199fa
Parents: 860ba06
Author: Aihua Xu <aihuaxu@apache.org>
Authored: Thu Feb 4 10:57:16 2016 -0500
Committer: Aihua Xu <aihuaxu@apache.org>
Committed: Fri Feb 12 15:21:31 2016 -0500

----------------------------------------------------------------------
  .../calcite/functions/CanAggregateDistinct.java | 27 +++++++++++++++++
  .../functions/HiveSqlCountAggFunction.java | 3 +-
  .../functions/HiveSqlSumAggFunction.java | 4 +--
  .../translator/SqlFunctionConverter.java | 29 ++++++++++--------
  .../hive/ql/udf/generic/GenericUDAFAverage.java | 31 ++++++++++++++++++--
  .../queries/clientpositive/windowing_distinct.q | 8 +++++
  .../clientpositive/windowing_distinct.q.out | 26 ++++++++++++++++
  7 files changed, 111 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
new file mode 100644
index 0000000..c24f3c0
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/CanAggregateDistinct.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+/**
+ * This is the UDAF interface to support DISTINCT function.
+ *
+ */
+public interface CanAggregateDistinct {
+ boolean isDistinct();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
index 58191e5..bc48707 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java
@@ -30,7 +30,7 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference;
  import org.apache.calcite.sql.type.SqlTypeName;
  import org.apache.calcite.util.ImmutableIntList;

-public class HiveSqlCountAggFunction extends SqlAggFunction {
+public class HiveSqlCountAggFunction extends SqlAggFunction implements CanAggregateDistinct {

    final boolean isDistinct;
    final SqlReturnTypeInference returnTypeInference;
@@ -52,6 +52,7 @@ public class HiveSqlCountAggFunction extends SqlAggFunction {
      this.operandTypeInference = operandTypeInference;
    }

+ @Override
    public boolean isDistinct() {
      return isDistinct;
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
index 498cd0e..dc286a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -46,7 +46,7 @@ import com.google.common.collect.ImmutableList;
   * <code>long</code>, <code>float</code>, <code>double</code>), and the result
   * is the same type.
   */
-public class HiveSqlSumAggFunction extends SqlAggFunction {
+public class HiveSqlSumAggFunction extends SqlAggFunction implements CanAggregateDistinct{
    final boolean isDistinct;
    final SqlReturnTypeInference returnTypeInference;
    final SqlOperandTypeInference operandTypeInference;
@@ -70,7 +70,7 @@ public class HiveSqlSumAggFunction extends SqlAggFunction {
    }

    //~ Methods ----------------------------------------------------------------
-
+ @Override
    public boolean isDistinct() {
      return isDistinct;
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 19aa414..b4c6e05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
  import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.CanAggregateDistinct;
  import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
  import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
  import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
@@ -217,24 +218,20 @@ public class SqlFunctionConverter {
          } else if (op.kind == SqlKind.PLUS_PREFIX) {
            node = (ASTNode) ParseDriver.adaptor.create(HiveParser.PLUS, "PLUS");
          } else {
- // Handle 'COUNT' function for the case of COUNT(*) and COUNT(DISTINCT)
- if (op instanceof HiveSqlCountAggFunction) {
+ // Handle COUNT/SUM/AVG function for the case of COUNT(*) and COUNT(DISTINCT)
+ if (op instanceof HiveSqlCountAggFunction ||
+ op instanceof HiveSqlSumAggFunction ||
+ (op instanceof CalciteUDAF && op.getName().equalsIgnoreCase(SqlStdOperatorTable.AVG.getName()))) {
              if (children.size() == 0) {
                node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR,
                  "TOK_FUNCTIONSTAR");
              } else {
- HiveSqlCountAggFunction countFunction = (HiveSqlCountAggFunction)op;
- if (countFunction.isDistinct()) {
+ CanAggregateDistinct distinctFunction = (CanAggregateDistinct) op;
+ if (distinctFunction.isDistinct()) {
                  node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONDI,
                      "TOK_FUNCTIONDI");
                }
              }
- } else if (op instanceof HiveSqlSumAggFunction) { // case SUM(DISTINCT)
- HiveSqlSumAggFunction sumFunction = (HiveSqlSumAggFunction) op;
- if (sumFunction.isDistinct()) {
- node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONDI,
- "TOK_FUNCTIONDI");
- }
            }
            node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName()));
          }
@@ -364,11 +361,18 @@ public class SqlFunctionConverter {
    }

    // UDAF is assumed to be deterministic
- public static class CalciteUDAF extends SqlAggFunction {
- public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference,
+ public static class CalciteUDAF extends SqlAggFunction implements CanAggregateDistinct {
+ private boolean isDistinct;
+ public CalciteUDAF(boolean isDistinct, String opName, SqlReturnTypeInference returnTypeInference,
          SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
        super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference,
            operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION);
+ this.isDistinct = isDistinct;
+ }
+
+ @Override
+ public boolean isDistinct() {
+ return isDistinct;
      }
    }

@@ -466,6 +470,7 @@ public class SqlFunctionConverter {
            break;
          default:
            calciteAggFn = new CalciteUDAF(
+ isDistinct,
                udfInfo.udfName,
                udfInfo.returnTypeInference,
                udfInfo.operandTypeInference,

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
index cd2449f..3c1ce26 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
@@ -26,8 +26,6 @@ import org.apache.hadoop.hive.ql.exec.Description;
  import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
  import org.apache.hadoop.hive.ql.metadata.HiveException;
  import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
-import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef;
  import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
  import org.apache.hadoop.hive.ql.util.JavaDataModel;
@@ -35,9 +33,11 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable;
  import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
  import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.StructField;
  import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
  import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
@@ -95,6 +95,19 @@ public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
      }
    }

+ @Override
+ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo)
+ throws SemanticException {
+ if (paramInfo.isAllColumns()) {
+ throw new SemanticException(
+ "The specified syntax for UDAF invocation is invalid.");
+ }
+
+ AbstractGenericUDAFAverageEvaluator eval =
+ (AbstractGenericUDAFAverageEvaluator) getEvaluator(paramInfo.getParameters());
+ eval.avgDistinct = paramInfo.isDistinct();
+ return eval;
+ }

    public static class GenericUDAFAverageEvaluatorDouble extends AbstractGenericUDAFAverageEvaluator<Double> {

@@ -102,6 +115,7 @@ public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
      public void doReset(AverageAggregationBuffer<Double> aggregation) throws HiveException {
        aggregation.count = 0;
        aggregation.sum = new Double(0);
+ aggregation.previousValue = null;
      }

      @Override
@@ -319,15 +333,18 @@ public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
    }

    private static class AverageAggregationBuffer<TYPE> implements AggregationBuffer {
+ private Object previousValue;
      private long count;
      private TYPE sum;
    };

    @SuppressWarnings("unchecked")
    public static abstract class AbstractGenericUDAFAverageEvaluator<TYPE> extends GenericUDAFEvaluator {
+ protected boolean avgDistinct;

      // For PARTIAL1 and COMPLETE
      protected transient PrimitiveObjectInspector inputOI;
+ protected transient ObjectInspector copiedOI;
      // For PARTIAL2 and FINAL
      private transient StructObjectInspector soi;
      private transient StructField countField;
@@ -359,6 +376,8 @@ public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
        // init input
        if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
          inputOI = (PrimitiveObjectInspector) parameters[0];
+ copiedOI = ObjectInspectorUtils.getStandardObjectInspector(inputOI,
+ ObjectInspectorCopyOption.JAVA);
        } else {
          soi = (StructObjectInspector) parameters[0];
          countField = soi.getStructFieldRef("count");
@@ -412,6 +431,14 @@ public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
        if (parameter != null) {
          AverageAggregationBuffer<TYPE> averageAggregation = (AverageAggregationBuffer<TYPE>) aggregation;
          try {
+ // Skip the same value if avgDistinct is true
+ if (this.avgDistinct &&
+ ObjectInspectorUtils.compare(parameter, inputOI, averageAggregation.previousValue, copiedOI) == 0) {
+ return;
+ }
+ averageAggregation.previousValue = ObjectInspectorUtils.copyToStandardObject(
+ parameter, inputOI, ObjectInspectorCopyOption.JAVA);
+
            doIterate(averageAggregation, inputOI, parameter);
          } catch (NumberFormatException e) {
            if (!warned) {

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/test/queries/clientpositive/windowing_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_distinct.q b/ql/src/test/queries/clientpositive/windowing_distinct.q
index 9f6ddfd..bb192a7 100644
--- a/ql/src/test/queries/clientpositive/windowing_distinct.q
+++ b/ql/src/test/queries/clientpositive/windowing_distinct.q
@@ -36,3 +36,11 @@ SELECT SUM(DISTINCT t) OVER (PARTITION BY index),
         SUM(DISTINCT ts) OVER (PARTITION BY index),
         SUM(DISTINCT dec) OVER (PARTITION BY index)
  FROM windowing_distinct;
+
+SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+ AVG(DISTINCT d) OVER (PARTITION BY index),
+ AVG(DISTINCT s) OVER (PARTITION BY index),
+ AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+ AVG(DISTINCT ts) OVER (PARTITION BY index),
+ AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct;

http://git-wip-us.apache.org/repos/asf/hive/blob/4f90a715/ql/src/test/results/clientpositive/windowing_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_distinct.q.out b/ql/src/test/results/clientpositive/windowing_distinct.q.out
index 0858f0f..074a594 100644
--- a/ql/src/test/results/clientpositive/windowing_distinct.q.out
+++ b/ql/src/test/results/clientpositive/windowing_distinct.q.out
@@ -102,3 +102,29 @@ POSTHOOK: Input: default@windowing_distinct
  235 77.42 0.0 0.0 2.724315837406612E9 69
  235 77.42 0.0 0.0 2.724315837406612E9 69
  235 77.42 0.0 0.0 2.724315837406612E9 69
+PREHOOK: query: SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+ AVG(DISTINCT d) OVER (PARTITION BY index),
+ AVG(DISTINCT s) OVER (PARTITION BY index),
+ AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+ AVG(DISTINCT ts) OVER (PARTITION BY index),
+ AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct
+PREHOOK: type: QUERY
+PREHOOK: Input: default@windowing_distinct
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT AVG(DISTINCT t) OVER (PARTITION BY index),
+ AVG(DISTINCT d) OVER (PARTITION BY index),
+ AVG(DISTINCT s) OVER (PARTITION BY index),
+ AVG(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index),
+ AVG(DISTINCT ts) OVER (PARTITION BY index),
+ AVG(DISTINCT dec) OVER (PARTITION BY index)
+FROM windowing_distinct
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@windowing_distinct
+#### A masked pattern was here ####
+27.0 28.315 NULL NULL 1.362157918703148E9 28.5000
+27.0 28.315 NULL NULL 1.362157918703148E9 28.5000
+27.0 28.315 NULL NULL 1.362157918703148E9 28.5000
+117.5 38.71 NULL NULL 1.362157918703306E9 34.5000
+117.5 38.71 NULL NULL 1.362157918703306E9 34.5000
+117.5 38.71 NULL NULL 1.362157918703306E9 34.5000

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedFeb 12, '16 at 8:23p
activeFeb 12, '16 at 8:23p
posts1
users1
websitehive.apache.org

1 user in discussion

Aihuaxu: 1 post

People

Translate

site design / logo © 2021 Grokbase