FAQ
Repository: hive
Updated Branches:
   refs/heads/master d7efa492e -> 664c5d561


HIVE-12941 : Unexpected result when using MIN() on struct with NULL in first field (Yongzhi Chen, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/664c5d56
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/664c5d56
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/664c5d56

Branch: refs/heads/master
Commit: 664c5d561ad74bcfe91404b382c52b9892b5dc86
Parents: d7efa49
Author: Yongzhi Chen <ychena@apache.org>
Authored: Thu Feb 4 10:18:22 2016 -0500
Committer: Yongzhi Chen <ychena@apache.org>
Committed: Thu Feb 11 09:20:10 2016 -0500

----------------------------------------------------------------------
  .../hive/ql/udf/generic/GenericUDAFMin.java | 4 +-
  .../queries/clientpositive/min_structvalue.q | 10 +++++
  .../clientpositive/min_structvalue.q.out | 45 ++++++++++++++++++++
  .../objectinspector/ObjectInspectorUtils.java | 42 +++++++++++++++---
  4 files changed, 95 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/664c5d56/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
index bde36e1..70e0db1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
@@ -28,7 +28,9 @@ import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
  import org.apache.hadoop.hive.ql.udf.UDFType;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax.MaxStreamingFixedWindow;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.FullMapEqualComparer;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -109,7 +111,7 @@ public class GenericUDAFMin extends AbstractGenericUDAFResolver {
          throws HiveException {
        if (partial != null) {
          MinAgg myagg = (MinAgg) agg;
- int r = ObjectInspectorUtils.compare(myagg.o, outputOI, partial, inputOI);
+ int r = ObjectInspectorUtils.compare(myagg.o, outputOI, partial, inputOI, new FullMapEqualComparer(), NullValueOption.MAXVALUE);
          if (myagg.o == null || r > 0) {
            myagg.o = ObjectInspectorUtils.copyToStandardObject(partial, inputOI,
                ObjectInspectorCopyOption.JAVA);

http://git-wip-us.apache.org/repos/asf/hive/blob/664c5d56/ql/src/test/queries/clientpositive/min_structvalue.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/min_structvalue.q b/ql/src/test/queries/clientpositive/min_structvalue.q
new file mode 100644
index 0000000..4431a0d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/min_structvalue.q
@@ -0,0 +1,10 @@
+select max(a), min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(null as int)) as a) tmp;
+
+select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",-2) as a union all select named_struct("field",cast(null as int)) as a) tmp;
+
+select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(5 as int)) as a) tmp;
+
+select min(a) FROM (select named_struct("field",1, "secf", cast(null as int) ) as a union all select named_struct("field",2, "secf", 3) as a union all select named_struct("field",cast(5 as int), "secf", 4) as a) tmp;
+
+select min(a) FROM (select named_struct("field",1, "secf", 2) as a union all select named_struct("field",-2, "secf", 3) as a union all select named_struct("field",cast(null as int), "secf", 1) as a) tmp;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/664c5d56/ql/src/test/results/clientpositive/min_structvalue.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/min_structvalue.q.out b/ql/src/test/results/clientpositive/min_structvalue.q.out
new file mode 100644
index 0000000..3582837
--- /dev/null
+++ b/ql/src/test/results/clientpositive/min_structvalue.q.out
@@ -0,0 +1,45 @@
+PREHOOK: query: select max(a), min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(null as int)) as a) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select max(a), min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(null as int)) as a) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"field":2} {"field":1}
+PREHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",-2) as a union all select named_struct("field",cast(null as int)) as a) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",-2) as a union all select named_struct("field",cast(null as int)) as a) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"field":-2}
+PREHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(5 as int)) as a) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select min(a) FROM (select named_struct("field",1) as a union all select named_struct("field",2) as a union all select named_struct("field",cast(5 as int)) as a) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"field":1}
+PREHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", cast(null as int) ) as a union all select named_struct("field",2, "secf", 3) as a union all select named_struct("field",cast(5 as int), "secf", 4) as a) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", cast(null as int) ) as a union all select named_struct("field",2, "secf", 3) as a union all select named_struct("field",cast(5 as int), "secf", 4) as a) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"field":1,"secf":null}
+PREHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", 2) as a union all select named_struct("field",-2, "secf", 3) as a union all select named_struct("field",cast(null as int), "secf", 1) as a) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select min(a) FROM (select named_struct("field",1, "secf", 2) as a union all select named_struct("field",-2, "secf", 3) as a union all select named_struct("field",cast(null as int), "secf", 1) as a) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"field":-2,"secf":3}

http://git-wip-us.apache.org/repos/asf/hive/blob/664c5d56/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 33e5357..c58e8ed 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -106,6 +106,17 @@ public final class ObjectInspectorUtils {
    }

    /**
+ * This enum controls how we interpret null value when compare two objects.
+ *
+ * MINVALUE means treating null value as the minimum value.
+ * MAXVALUE means treating null value as the maximum value.
+ *
+ */
+ public enum NullValueOption {
+ MINVALUE, MAXVALUE
+ }
+
+ /**
     * Calculates the hash code for array of Objects that contains writables. This is used
     * to work around the buggy Hadoop DoubleWritable hashCode implementation. This should
     * only be used for process-local hash codes; don't replace stored hash codes like bucketing.
@@ -762,17 +773,38 @@ public final class ObjectInspectorUtils {

    /**
     * Compare two objects with their respective ObjectInspectors.
+ * Treat null as minimum value.
     */
    public static int compare(Object o1, ObjectInspector oi1, Object o2,
        ObjectInspector oi2, MapEqualComparer mapEqualComparer) {
+ return compare(o1, oi1, o2, oi2, mapEqualComparer, NullValueOption.MINVALUE);
+ }
+
+ /**
+ * Compare two objects with their respective ObjectInspectors.
+ * if nullValueOpt is MAXVALUE, treat null as maximum value.
+ * if nullValueOpt is MINVALUE, treat null as minimum value.
+ */
+ public static int compare(Object o1, ObjectInspector oi1, Object o2,
+ ObjectInspector oi2, MapEqualComparer mapEqualComparer, NullValueOption nullValueOpt) {
      if (oi1.getCategory() != oi2.getCategory()) {
        return oi1.getCategory().compareTo(oi2.getCategory());
      }

+ int nullCmpRtn = -1;
+ switch (nullValueOpt) {
+ case MAXVALUE:
+ nullCmpRtn = 1;
+ break;
+ case MINVALUE:
+ nullCmpRtn = -1;
+ break;
+ }
+
      if (o1 == null) {
- return o2 == null ? 0 : -1;
+ return o2 == null ? 0 : nullCmpRtn;
      } else if (o2 == null) {
- return 1;
+ return -nullCmpRtn;
      }

      switch (oi1.getCategory()) {
@@ -915,7 +947,7 @@ public final class ObjectInspectorUtils {
          int r = compare(soi1.getStructFieldData(o1, fields1.get(i)), fields1
              .get(i).getFieldObjectInspector(), soi2.getStructFieldData(o2,
              fields2.get(i)), fields2.get(i).getFieldObjectInspector(),
- mapEqualComparer);
+ mapEqualComparer, nullValueOpt);
          if (r != 0) {
            return r;
          }
@@ -930,7 +962,7 @@ public final class ObjectInspectorUtils {
          int r = compare(loi1.getListElement(o1, i), loi1
              .getListElementObjectInspector(), loi2.getListElement(o2, i), loi2
              .getListElementObjectInspector(),
- mapEqualComparer);
+ mapEqualComparer, nullValueOpt);
          if (r != 0) {
            return r;
          }
@@ -955,7 +987,7 @@ public final class ObjectInspectorUtils {
        return compare(uoi1.getField(o1),
            uoi1.getObjectInspectors().get(tag1),
            uoi2.getField(o2), uoi2.getObjectInspectors().get(tag2),
- mapEqualComparer);
+ mapEqualComparer, nullValueOpt);
      }
      default:
        throw new RuntimeException("Compare on unknown type: "

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedFeb 11, '16 at 2:22p
activeFeb 11, '16 at 2:22p
posts1
users1
websitehive.apache.org

1 user in discussion

Ychena: 1 post

People

Translate

site design / logo © 2021 Grokbase