FAQ
Author: hashutosh
Date: Wed Jul 17 20:25:00 2013
New Revision: 1504265

URL: http://svn.apache.org/r1504265
Log:
HIVE-4787 : make vectorized LOWER(), UPPER(), LENGTH() work end-to-end; support expression input for vectorized LIKE (Eric Hanson via Ashutosh Chauhan)

Modified:
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Jul 17 20:25:00 2013
@@ -72,7 +72,9 @@ import org.apache.hadoop.hive.ql.plan.Ex
  import org.apache.hadoop.hive.ql.plan.api.OperatorType;
  import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
  import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLength;
  import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLower;
  import org.apache.hadoop.hive.ql.udf.UDFMinute;
  import org.apache.hadoop.hive.ql.udf.UDFMonth;
  import org.apache.hadoop.hive.ql.udf.UDFOPDivide;
@@ -83,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
  import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
  import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
  import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFUpper;
  import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
  import org.apache.hadoop.hive.ql.udf.UDFYear;
  import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -453,21 +456,80 @@ public class VectorizationContext {
        return getTimestampFieldExpression(cl.getSimpleName(), childExpr);
      } else if (cl.equals(UDFLike.class)) {
        return getLikeExpression(childExpr);
+ } else if (cl.equals(UDFLower.class)) {
+ return getUnaryStringExpression("StringLower", "String", childExpr);
+ } else if (cl.equals(UDFUpper.class)) {
+ return getUnaryStringExpression("StringUpper", "String", childExpr);
+ } else if (cl.equals(UDFLength.class)) {
+ return getUnaryStringExpression("StringLength", "Long", childExpr);
      }

      throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
    }

+ /* Return a unary string vector expression. This is used for functions like
+ * UPPER() and LOWER().
+ */
+ private VectorExpression getUnaryStringExpression(String vectorExprClassName,
+ String resultType, // result type name
+ List<ExprNodeDesc> childExprList) throws HiveException {
+
+ /* Create an instance of the class vectorExprClassName for the input column or expression result
+ * and return it.
+ */
+
+ ExprNodeDesc childExpr = childExprList.get(0);
+ int inputCol;
+ VectorExpression v1 = null;
+ if (childExpr instanceof ExprNodeGenericFuncDesc) {
+ v1 = getVectorExpression(childExpr);
+ inputCol = v1.getOutputColumn();
+ } else if (childExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
+ inputCol = getInputColumnIndex(colDesc.getColumn());
+ } else {
+ // TODO? add code to handle constant argument case
+ throw new HiveException("Expression not supported: "+childExpr);
+ }
+ String outputColumnType = getNormalizedTypeName(resultType);
+ int outputCol = ocm.allocateOutputColumn(outputColumnType);
+ String className = "org.apache.hadoop.hive.ql.exec.vector.expressions."
+ + vectorExprClassName;
+ VectorExpression expr;
+ try {
+ expr = (VectorExpression) Class.forName(className).
+ getDeclaredConstructors()[0].newInstance(inputCol, outputCol);
+ } catch (Exception ex) {
+ throw new HiveException(ex);
+ }
+ if (v1 != null) {
+ expr.setChildExpressions(new VectorExpression [] {v1});
+ ocm.freeOutputColumn(v1.getOutputColumn());
+ }
+ return expr;
+ }
+
    private VectorExpression getLikeExpression(List<ExprNodeDesc> childExpr) throws HiveException {
      ExprNodeDesc leftExpr = childExpr.get(0);
      ExprNodeDesc rightExpr = childExpr.get(1);

+ VectorExpression v1 = null;
      VectorExpression expr = null;
+ int inputCol;
+ ExprNodeConstantDesc constDesc;
+
      if ((leftExpr instanceof ExprNodeColumnDesc) &&
          (rightExpr instanceof ExprNodeConstantDesc) ) {
        ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol = getInputColumnIndex(leftColDesc.getColumn());
+ constDesc = (ExprNodeConstantDesc) rightExpr;
+ inputCol = getInputColumnIndex(leftColDesc.getColumn());
+ expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
+ new Text((byte[]) getScalarValue(constDesc)));
+ } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) &&
+ (rightExpr instanceof ExprNodeConstantDesc)) {
+ v1 = getVectorExpression(leftExpr);
+ inputCol = v1.getOutputColumn();
+ constDesc = (ExprNodeConstantDesc) rightExpr;
        expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
            new Text((byte[]) getScalarValue(constDesc)));
      }
@@ -475,6 +537,10 @@ public class VectorizationContext {
      if (expr == null) {
        throw new HiveException("Vector LIKE filter expression could not be initialized");
      }
+ if (v1 != null) {
+ expr.setChildExpressions(new VectorExpression [] {v1});
+ ocm.freeOutputColumn(v1.getOutputColumn());
+ }
      return expr;
    }


Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java Wed Jul 17 20:25:00 2013
@@ -264,6 +264,11 @@ public class FilterStringColLikeStringSc

    @Override
    public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
      BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
      int[] sel = batch.selected;
      boolean[] nullPos = inputColVector.isNull;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java Wed Jul 17 20:25:00 2013
@@ -26,7 +26,7 @@ public class StringLength extends Vector
    private int colNum;
    private int outputColumn;

- StringLength (int colNum, int outputColumn) {
+ public StringLength (int colNum, int outputColumn) {
      this.colNum = colNum;
      this.outputColumn = outputColumn;
    }
@@ -34,6 +34,11 @@ public class StringLength extends Vector
    // Calculate the length of the UTF-8 strings in input vector and place results in output vector.
    @Override
    public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
      BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
      LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
      int[] sel = batch.selected;
@@ -127,7 +132,7 @@ public class StringLength extends Vector

    @Override
    public String getOutputType() {
- return "String";
+ return "Long";
    }



Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java Wed Jul 17 20:25:00 2013
@@ -21,7 +21,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
  import org.apache.hadoop.hive.ql.udf.IUDFUnaryString;

  public class StringLower extends StringUnaryUDF {
- StringLower(int colNum, int outputColumn) {
+ public StringLower(int colNum, int outputColumn) {
      super(colNum, outputColumn, (IUDFUnaryString) new UDFLower());
    }
  }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java Wed Jul 17 20:25:00 2013
@@ -38,6 +38,11 @@ public class StringUnaryUDF extends Vect

    @Override
    public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
      BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum];
      int[] sel = batch.selected;
      int n = batch.size;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java?rev=1504265&r1=1504264&r2=1504265&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java Wed Jul 17 20:25:00 2013
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
  import org.apache.hadoop.hive.ql.udf.IUDFUnaryString;

  public class StringUpper extends StringUnaryUDF {
- StringUpper(int colNum, int outputColumn) {
+ public StringUpper(int colNum, int outputColumn) {
      super(colNum, outputColumn, (IUDFUnaryString) new UDFUpper());
    }
  }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJul 17, '13 at 8:25p
activeJul 17, '13 at 8:25p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase