FAQ
Repository: hive
Updated Branches:
   refs/heads/master 570dea850 -> 9fc5d2c01


HIVE-12209: Vectorize simple UDFs with null arguments (Gopal V, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9fc5d2c0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9fc5d2c0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9fc5d2c0

Branch: refs/heads/master
Commit: 9fc5d2c0178f3f1fa26a0f372f5cc731aeac430a
Parents: 570dea8
Author: Gopal V <gopalv@apache.org>
Authored: Mon Nov 2 19:42:35 2015 -0800
Committer: Gopal V <gopalv@apache.org>
Committed: Mon Nov 2 19:42:35 2015 -0800

----------------------------------------------------------------------
  .../ql/exec/vector/VectorizationContext.java | 7 +-
  .../ql/exec/vector/udf/VectorUDFArgDesc.java | 19 ++--
  .../queries/clientpositive/vectorized_case.q | 19 ++++
  .../clientpositive/spark/vectorized_case.q.out | 109 +++++++++++++++++--
  .../clientpositive/tez/vectorized_case.q.out | 109 +++++++++++++++++--
  .../clientpositive/vectorized_case.q.out | 69 ++++++++++++
  6 files changed, 301 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 3489c9c..e7a829e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2022,12 +2022,7 @@ public class VectorizationContext {
          variableArgPositions.add(i);
          argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
        } else if (child instanceof ExprNodeConstantDesc) {
- if (((ExprNodeConstantDesc) child).getValue() == null) {
- // cannot handle constant null at the moment
- throw new HiveException("Unable to vectorize custom UDF. Custom udf containing "
- + "constant null argument cannot be currently vectorized.");
- }
- // this is a constant
+ // this is a constant (or null)
          argDescs[i].setConstant((ExprNodeConstantDesc) child);
        } else {
          throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : "

http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
index e113980..6abfe63 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
@@ -59,13 +59,18 @@ public class VectorUDFArgDesc implements Serializable {
     * during initialization.
     */
    public void prepareConstant() {
- PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
- .getPrimitiveCategory();
-
- // Convert from Java to Writable
- Object writableValue = PrimitiveObjectInspectorFactory
- .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
- constExpr.getValue());
+ final Object writableValue;
+ if (constExpr != null) {
+ PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
+ .getPrimitiveCategory();
+
+ // Convert from Java to Writable
+ writableValue = PrimitiveObjectInspectorFactory
+ .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
+ constExpr.getValue());
+ } else {
+ writableValue = null;
+ }

      constObjVal = new GenericUDF.DeferredJavaObject(writableValue);
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/test/queries/clientpositive/vectorized_case.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q
index 8799fbb..e74bf82 100644
--- a/ql/src/test/queries/clientpositive/vectorized_case.q
+++ b/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -1,4 +1,5 @@
  set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
  set hive.vectorized.execution.enabled = true
  ;
  explain
@@ -36,3 +37,21 @@ where csmallint = 418
  or csmallint = 12205
  or csmallint = 10583
  ;
+explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index c2250e6..ade9cfe 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
  or csmallint = 10583
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1

  STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
    Stage: Stage-0
      Fetch Operator
        limit: -1
        Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
- Select Operator
- expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink

  PREHOOK: query: select
    csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
  10583 c c
  418 a a
  12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index c2250e6..136714d 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
  or csmallint = 10583
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1

  STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
    Stage: Stage-0
      Fetch Operator
        limit: -1
        Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
- Select Operator
- expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink

  PREHOOK: query: select
    csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
  10583 c c
  418 a a
  12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/9fc5d2c0/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 73bf12d..347a93e 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -109,3 +109,72 @@ POSTHOOK: Input: default@alltypesorc
  10583 c c
  418 a a
  12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedNov 3, '15 at 3:43a
activeNov 3, '15 at 3:43a
posts1
users1
websitehive.apache.org

1 user in discussion

Gopalv: 1 post

People

Translate

site design / logo © 2021 Grokbase