FAQ
Author: ehans
Date: Fri Jan 3 18:27:45 2014
New Revision: 1555189

URL: http://svn.apache.org/r1555189
Log:
HIVE-5757: Implement vectorized support for CASE (Eric Hanson)

Added:
     hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q
     hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1555189&r1=1555188&r2=1555189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Jan 3 18:27:45 2014
@@ -293,7 +293,20 @@ public class VectorizationContext {
arg0Type(expr).equals("float"))) {
          return true;
        }
- } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) {
+ } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string"))
+
+ /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because
+ * of their complexity and generality. In the future, variations of these
+ * can be optimized to run faster for the vectorized code path. For example,
+ * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
+ * is an example of a GenericUDFCase that has all constant arguments
+ * except for the first argument. This is probably a common case and a
+ * good candidate for a fast, special-purpose VectorExpression. Then
+ * the UDF Adaptor code path could be used as a catch-all for
+ * non-optimized general cases.
+ */
+ || gudf instanceof GenericUDFCase
+ || gudf instanceof GenericUDFWhen) {
        return true;
      }
      return false;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1555189&r1=1555188&r2=1555189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 3 18:27:45 2014
@@ -123,6 +123,7 @@ import org.apache.hadoop.hive.ql.udf.gen
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor;
@@ -153,6 +154,7 @@ import org.apache.hadoop.hive.ql.udf.gen
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;

  public class Vectorizer implements PhysicalPlanResolver {

@@ -250,6 +252,8 @@ public class Vectorizer implements Physi
      supportedGenericUDFs.add(GenericUDFAbs.class);
      supportedGenericUDFs.add(GenericUDFBetween.class);
      supportedGenericUDFs.add(GenericUDFIn.class);
+ supportedGenericUDFs.add(GenericUDFCase.class);
+ supportedGenericUDFs.add(GenericUDFWhen.class);

      // For type casts
      supportedGenericUDFs.add(UDFToLong.class);

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q?rev=1555189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q Fri Jan 3 18:27:45 2014
@@ -0,0 +1,37 @@
+set hive.vectorized.execution.enabled = true
+;
+explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out?rev=1555189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out Fri Jan 3 18:27:45 2014
@@ -0,0 +1,120 @@
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_FUNCTION when (= (TOK_TABLE_OR_COL csmallint) 418) "a" (= (TOK_TABLE_OR_COL csmallint) 12205) "b" "c")) (TOK_SELEXPR (TOK_FUNCTION case (TOK_TABLE_OR_COL csmallint) 418 "a" 12205 "b" "c"))) (TOK_WHERE (or (or (= (TOK_TABLE_OR_COL csmallint) 418) (= (TOK_TABLE_OR_COL csmallint) 12205)) (= (TOK_TABLE_OR_COL csmallint) 10583)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ alltypesorc
+ TableScan
+ alias: alltypesorc
+ Filter Operator
+ predicate:
+ expr: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583))
+ type: boolean
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: csmallint
+ type: smallint
+ expr: CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END
+ type: string
+ expr: CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else "c"
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then "b"
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+12205 b b
+10583 c c
+418 a a
+12205 b b

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 3, '14 at 6:28p
activeJan 3, '14 at 6:28p
posts1
users1
websitehive.apache.org

1 user in discussion

Ehans: 1 post

People

Translate

site design / logo © 2021 Grokbase