FAQ
Author: hashutosh
Date: Tue Sep 17 21:13:53 2013
New Revision: 1524226

URL: http://svn.apache.org/r1524226
Log:
HIVE-4961 : Create bridge for custom UDFs to operate in vectorized mode (Eric Hanson via Ashutosh Chauhan)

Added:
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
     hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
Modified:
     hive/branches/vectorization/build-common.xml
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
     hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java

Modified: hive/branches/vectorization/build-common.xml
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/build-common.xml?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/build-common.xml (original)
+++ hive/branches/vectorization/build-common.xml Tue Sep 17 21:13:53 2013
@@ -478,7 +478,7 @@
        <batchtest todir="${test.build.dir}" unless="testcase">
          <fileset dir="${test.build.classes}"
                   includes="**/${test.include}.class"
- excludes="**/ql/exec/vector/util/*.class,**/TestSerDe.class,**/TestHiveMetaStore.class,**/TestBeeLineDriver.class,**/TestHiveServer2Concurrency.class,**/*$*.class,${test.junit.exclude}" />
+ excludes="**/ql/exec/vector/util/*.class,**/ql/exec/vector/udf/legacy/*.class,**/ql/exec/vector/udf/generic/*.class,**/TestSerDe.class,**/TestHiveMetaStore.class,**/TestBeeLineDriver.class,**/TestHiveServer2Concurrency.class,**/*$*.class,${test.junit.exclude}" />
        </batchtest>
        <batchtest todir="${test.build.dir}" if="testcase">
          <fileset dir="${test.build.classes}" includes="**/${testcase}.class"/>

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Sep 17 21:13:53 2013
@@ -31,6 +31,8 @@ import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
  import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
  import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression;
@@ -64,6 +66,8 @@ import org.apache.hadoop.hive.ql.exec.ve
  import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
  import org.apache.hadoop.hive.ql.metadata.HiveException;
  import org.apache.hadoop.hive.ql.plan.AggregationDesc;
  import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -141,6 +145,17 @@ public class VectorizationContext {
      }
    }

+ /* Return true if we are running in the planner, and false if we
+ * are running in a task.
+ */
+ /*
+ private boolean isPlanner() {
+
+ // This relies on the behavior that columnMap is null in the planner.
+ return columnMap == null;
+ }
+ */
+
    private class OutputColumnManager {
      private final int initialOutputCol;
      private int outputColCount = 0;
@@ -243,8 +258,12 @@ public class VectorizationContext {
        ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
      } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
- ve = getVectorExpression(expr.getGenericUDF(),
- expr.getChildExprs());
+ if (isCustomUDF(expr)) {
+ ve = getCustomUDFExpression(expr);
+ } else {
+ ve = getVectorExpression(expr.getGenericUDF(),
+ expr.getChildExprs());
+ }
      } else if (exprDesc instanceof ExprNodeConstantDesc) {
        ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc);
      }
@@ -254,6 +273,21 @@ public class VectorizationContext {
      return ve;
    }

+ // Return true if this is a custom UDF or custom GenericUDF.
+ // This is for use only in the planner. It will fail in a task.
+ public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
+ String udfName = expr.getFuncText();
+ if (udfName == null) {
+ return false;
+ }
+ FunctionInfo funcInfo = FunctionRegistry.getFunctionInfo(udfName);
+ if (funcInfo == null) {
+ return false;
+ }
+ boolean isNativeFunc = funcInfo.isNative();
+ return !isNativeFunc;
+ }
+
    /**
     * Handles only the special case of unary operators on a constant.
     * @param exprDesc
@@ -474,6 +508,104 @@ public class VectorizationContext {
      throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
    }

+ /*
+ * Return vector expression for a custom (i.e. not built-in) UDF.
+ */
+ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr)
+ throws HiveException {
+
+ //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
+ List<ExprNodeDesc> childExprList = expr.getChildExprs();
+
+ // argument descriptors
+ VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildExprs().size()];
+ for (int i = 0; i < argDescs.length; i++) {
+ argDescs[i] = new VectorUDFArgDesc();
+ }
+
+ // positions of variable arguments (columns or non-constant expressions)
+ List<Integer> variableArgPositions = new ArrayList<Integer>();
+
+ // Column numbers of batch corresponding to expression result arguments
+ List<Integer> exprResultColumnNums = new ArrayList<Integer>();
+
+ // Prepare children
+ List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();
+
+ for (int i = 0; i < childExprList.size(); i++) {
+ ExprNodeDesc child = childExprList.get(i);
+ if (child instanceof ExprNodeGenericFuncDesc) {
+ VectorExpression e = getVectorExpression(child);
+ vectorExprs.add(e);
+ variableArgPositions.add(i);
+ exprResultColumnNums.add(e.getOutputColumn());
+ argDescs[i].setVariable(e.getOutputColumn());
+ } else if (child instanceof ExprNodeColumnDesc) {
+ variableArgPositions.add(i);
+ argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
+ } else if (child instanceof ExprNodeConstantDesc) {
+
+ // this is a constant
+ argDescs[i].setConstant((ExprNodeConstantDesc) child);
+ } else {
+ throw new HiveException("Unable to vectorize Custom UDF");
+ }
+ }
+
+ // Allocate output column and get column number;
+ int outputCol = -1;
+ String resultColVectorType;
+ String resultType = expr.getTypeInfo().getTypeName();
+ if (resultType.equalsIgnoreCase("string")) {
+ resultColVectorType = "String";
+ } else if (isIntFamily(resultType)) {
+ resultColVectorType = "Long";
+ } else if (isFloatFamily(resultType)) {
+ resultColVectorType = "Double";
+ } else if (resultType.equalsIgnoreCase("timestamp")) {
+ resultColVectorType = "Long";
+ } else {
+ throw new HiveException("Unable to vectorize due to unsupported custom UDF return type "
+ + resultType);
+ }
+ outputCol = ocm.allocateOutputColumn(resultColVectorType);
+
+ // Make vectorized operator
+ VectorExpression ve;
+ ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);
+
+ // Set child expressions
+ VectorExpression[] childVEs = null;
+ if (exprResultColumnNums.size() != 0) {
+ childVEs = new VectorExpression[exprResultColumnNums.size()];
+ for (int i = 0; i < childVEs.length; i++) {
+ childVEs[i] = vectorExprs.get(i);
+ }
+ }
+ ve.setChildExpressions(childVEs);
+
+ // Free output columns if inputs have non-leaf expression trees.
+ for (Integer i : exprResultColumnNums) {
+ ocm.freeOutputColumn(i);
+ }
+ return ve;
+ }
+
+ // return true if this is any kind of float
+ public static boolean isFloatFamily(String resultType) {
+ return resultType.equalsIgnoreCase("double")
+ || resultType.equalsIgnoreCase("float");
+ }
+
+ // Return true if this data type is handled in the output vector as an integer.
+ public static boolean isIntFamily(String resultType) {
+ return resultType.equalsIgnoreCase("tinyint")
+ || resultType.equalsIgnoreCase("smallint")
+ || resultType.equalsIgnoreCase("int")
+ || resultType.equalsIgnoreCase("bigint")
+ || resultType.equalsIgnoreCase("boolean");
+ }
+
    /* Return a unary string vector expression. This is used for functions like
     * UPPER() and LOWER().
     */

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,346 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+
+/**
+ * A VectorUDFAdaptor is a vectorized expression for invoking a custom
+ * UDF on zero or more input vectors or constants which are the function arguments.
+ */
+public class VectorUDFAdaptor extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int outputColumn;
+ private String resultType;
+ private VectorUDFArgDesc[] argDescs;
+ private ExprNodeGenericFuncDesc expr;
+
+ private transient GenericUDF genericUDF;
+ private transient GenericUDF.DeferredObject[] deferredChildren;
+ private transient ObjectInspector outputOI;
+ private transient ObjectInspector[] childrenOIs;
+ private transient VectorExpressionWriter[] writers;
+
+ public VectorUDFAdaptor() {
+ super();
+ }
+
+ public VectorUDFAdaptor (
+ ExprNodeGenericFuncDesc expr,
+ int outputColumn,
+ String resultType,
+ VectorUDFArgDesc[] argDescs) throws HiveException {
+
+ this();
+ this.expr = expr;
+ this.outputColumn = outputColumn;
+ this.resultType = resultType;
+ this.argDescs = argDescs;
+ }
+
+ // Initialize transient fields. To be called after deserialization of other fields.
+ public void init() throws HiveException, UDFArgumentException {
+ genericUDF = expr.getGenericUDF();
+ deferredChildren = new GenericUDF.DeferredObject[expr.getChildExprs().size()];
+ childrenOIs = new ObjectInspector[expr.getChildExprs().size()];
+ writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildExprs());
+ for (int i = 0; i < childrenOIs.length; i++) {
+ childrenOIs[i] = writers[i].getObjectInspector();
+ }
+ outputOI = VectorExpressionWriterFactory.genVectorExpressionWritable(expr)
+ .getObjectInspector();
+
+ genericUDF.initialize(childrenOIs);
+
+ // Initialize constant arguments
+ for (int i = 0; i < argDescs.length; i++) {
+ if (argDescs[i].isConstant()) {
+ argDescs[i].prepareConstant();
+ }
+ }
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (genericUDF == null) {
+ try {
+ init();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ int[] sel = batch.selected;
+ int n = batch.size;
+ ColumnVector outV = batch.cols[outputColumn];
+
+ // If the output column is of type string, initialize the buffer to receive data.
+ if (outV instanceof BytesColumnVector) {
+ ((BytesColumnVector) outV).initBuffer();
+ }
+
+ if (n == 0) {
+ //Nothing to do
+ return;
+ }
+
+ batch.cols[outputColumn].noNulls = true;
+
+ /* If all input columns are repeating, just evaluate function
+ * for row 0 in the batch and set output repeating.
+ */
+ if (allInputColsRepeating(batch)) {
+ setResult(0, batch);
+ batch.cols[outputColumn].isRepeating = true;
+ return;
+ } else {
+ batch.cols[outputColumn].isRepeating = false;
+ }
+
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ setResult(i, batch);
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ setResult(i, batch);
+ }
+ }
+ }
+
+ /* Return false if any input column is non-repeating, otherwise true.
+ * This returns false if all the arguments are constant or there
+ * are zero arguments.
+ *
+ * A possible future optimization is to set the output to isRepeating
+ * for cases of all-constant arguments for deterministic functions.
+ */
+ private boolean allInputColsRepeating(VectorizedRowBatch batch) {
+ int varArgCount = 0;
+ for (int i = 0; i < argDescs.length; i++) {
+ if (argDescs[i].isVariable() && !batch.cols[argDescs[i].getColumnNum()].isRepeating) {
+ return false;
+ }
+ varArgCount += 1;
+ }
+ if (varArgCount > 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /* Calculate the function result for row i of the batch and
+ * set the output column vector entry i to the result.
+ */
+ private void setResult(int i, VectorizedRowBatch b) {
+
+ // get arguments
+ for (int j = 0; j < argDescs.length; j++) {
+ deferredChildren[j] = argDescs[j].getDeferredJavaObject(i, b, j, writers);
+ }
+
+ // call function
+ Object result;
+ try {
+ result = genericUDF.evaluate(deferredChildren);
+ } catch (HiveException e) {
+
+ /* For UDFs that expect primitive types (like int instead of Integer or IntWritable),
+ * this will catch the the exception that happens if they are passed a NULL value.
+ * Then the default NULL handling logic will apply, and the result will be NULL.
+ */
+ result = null;
+ }
+
+ // set output column vector entry
+ if (result == null) {
+ b.cols[outputColumn].noNulls = false;
+ b.cols[outputColumn].isNull[i] = true;
+ } else {
+ b.cols[outputColumn].isNull[i] = false;
+ setOutputCol(b.cols[outputColumn], i, result);
+ }
+ }
+
+ private void setOutputCol(ColumnVector colVec, int i, Object value) {
+
+ /* Depending on the output type, get the value, cast the result to the
+ * correct type if needed, and assign the result into the output vector.
+ */
+ if (outputOI instanceof WritableStringObjectInspector) {
+ BytesColumnVector bv = (BytesColumnVector) colVec;
+ Text t;
+ if (value instanceof String) {
+ t = new Text((String) value);
+ } else {
+ t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
+ }
+ bv.setVal(i, t.getBytes(), 0, t.getLength());
+ } else if (outputOI instanceof WritableIntObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ if (value instanceof Integer) {
+ lv.vector[i] = (Integer) value;
+ } else {
+ lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableLongObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ if (value instanceof Long) {
+ lv.vector[i] = (Long) value;
+ } else {
+ lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableDoubleObjectInspector) {
+ DoubleColumnVector dv = (DoubleColumnVector) colVec;
+ if (value instanceof Double) {
+ dv.vector[i] = (Double) value;
+ } else {
+ dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableFloatObjectInspector) {
+ DoubleColumnVector dv = (DoubleColumnVector) colVec;
+ if (value instanceof Float) {
+ dv.vector[i] = (Float) value;
+ } else {
+ dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableShortObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ if (value instanceof Short) {
+ lv.vector[i] = (Short) value;
+ } else {
+ lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableByteObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ if (value instanceof Byte) {
+ lv.vector[i] = (Byte) value;
+ } else {
+ lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
+ }
+ } else if (outputOI instanceof WritableTimestampObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ Timestamp ts;
+ if (value instanceof Timestamp) {
+ ts = (Timestamp) value;
+ } else {
+ ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
+ }
+ /* Calculate the number of nanoseconds since the epoch as a long integer. By convention
+ * that is how Timestamp values are operated on in a vector.
+ */
+ long l = ts.getTime() * 1000000 // Shift the milliseconds value over by 6 digits
+ // to scale for nanosecond precision.
+ // The milliseconds digits will by convention be all 0s.
+ + ts.getNanos() % 1000000; // Add on the remaining nanos.
+ // The % 1000000 operation removes the ms values
+ // so that the milliseconds are not counted twice.
+ lv.vector[i] = l;
+ } else if (outputOI instanceof WritableBooleanObjectInspector) {
+ LongColumnVector lv = (LongColumnVector) colVec;
+ if (value instanceof Boolean) {
+ lv.vector[i] = (Boolean) value ? 1 : 0;
+ } else {
+ lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
+ }
+ } else {
+ throw new RuntimeException("Unhandled object type " + outputOI.getTypeName());
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return resultType;
+ }
+
+ public String getResultType() {
+ return resultType;
+ }
+
+ public void setResultType(String resultType) {
+ this.resultType = resultType;
+ }
+
+ public VectorUDFArgDesc[] getArgDescs() {
+ return argDescs;
+ }
+
+ public void setArgDescs(VectorUDFArgDesc[] argDescs) {
+ this.argDescs = argDescs;
+ }
+
+ public ExprNodeGenericFuncDesc getExpr() {
+ return expr;
+ }
+
+ public void setExpr(ExprNodeGenericFuncDesc expr) {
+ this.expr = expr;
+ }
+}

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+
+/**
+ * Descriptor for function argument.
+ */
+public class VectorUDFArgDesc implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private boolean isConstant;
+ private int columnNum;
+ private transient GenericUDF.DeferredJavaObject constObjVal;
+ private ExprNodeConstantDesc constExpr;
+
+ public VectorUDFArgDesc() {
+ }
+
+ /**
+ * Set this argument to a constant value extracted from the
+ * expression tree.
+ */
+ public void setConstant(ExprNodeConstantDesc expr) {
+ isConstant = true;
+ constExpr = expr;
+ }
+
+ /* Prepare the constant for use when the function is called. To be used
+ * during initialization.
+ */
+ public void prepareConstant() {
+ PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
+ .getPrimitiveCategory();
+
+ // Convert from Java to Writable
+ Object writableValue = PrimitiveObjectInspectorFactory
+ .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
+ constExpr.getValue());
+
+ constObjVal = new GenericUDF.DeferredJavaObject(writableValue);
+ }
+
+ /**
+ * Set this argument to be a "variable" one which is to be taken from
+ * a specified column vector number i.
+ */
+ public void setVariable(int i) {
+ columnNum = i;
+ }
+
+ public boolean isConstant() {
+ return isConstant;
+ }
+
+ public boolean isVariable() {
+ return !isConstant;
+ }
+
+ public int getColumn() {
+ return columnNum;
+ }
+
+ public DeferredObject getDeferredJavaObject(int row, VectorizedRowBatch b, int argPosition,
+ VectorExpressionWriter[] writers) {
+
+ if (isConstant()) {
+ return this.constObjVal;
+ } else {
+
+ // get column
+ ColumnVector cv = b.cols[columnNum];
+
+ // write value to object that can be inspected
+ Object o;
+ try {
+ o = writers[argPosition].writeValue(cv, row);
+ return new GenericUDF.DeferredJavaObject(o);
+ } catch (HiveException e) {
+ throw new RuntimeException("Unable to get Java object from VectorizedRowBatch");
+ }
+ }
+ }
+
+ public boolean getIsConstant() {
+ return isConstant;
+ }
+
+ public void setIsConstant(boolean isConstant) {
+ this.isConstant = isConstant;
+ }
+
+ public int getColumnNum() {
+ return columnNum;
+ }
+
+ public void setColumnNum(int columnNum) {
+ this.columnNum = columnNum;
+ }
+
+ public ExprNodeConstantDesc getConstExpr() {
+ return constExpr;
+ }
+
+ public void setConstExpr(ExprNodeConstantDesc constExpr) {
+ this.constExpr = constExpr;
+ }
+}

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Sep 17 21:13:53 2013
@@ -461,7 +461,7 @@ public class Vectorizer implements Physi
      }
      if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
- boolean r = validateGenericUdf(d.getGenericUDF());
+ boolean r = validateGenericUdf(d);
        if (!r) {
          return false;
        }
@@ -474,7 +474,11 @@ public class Vectorizer implements Physi
      return true;
    }

- private boolean validateGenericUdf(GenericUDF genericUDF) {
+ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) {
+ if (VectorizationContext.isCustomUDF(genericUDFExpr)) {
+ return true;
+ }
+ GenericUDF genericUDF = genericUDFExpr.getGenericUDF();
      if (genericUDF instanceof GenericUDFBridge) {
        Class<? extends UDF> udf = ((GenericUDFBridge) genericUDF).getUdfClass();
        return supportedGenericUDFs.contains(udf);

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Tue Sep 17 21:13:53 2013
@@ -659,7 +659,7 @@ public final class TypeCheckProcFactory

        List<ExprNodeDesc> childrenList = new ArrayList<ExprNodeDesc>(children.length);
        childrenList.addAll(Arrays.asList(children));
- return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList);
+ return ExprNodeGenericFuncDesc.newInstance(genericUDF, udfName, childrenList);
      }

      static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr,
@@ -724,7 +724,7 @@ public final class TypeCheckProcFactory
            // Calculate TypeInfo
            TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo();
            desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
- .getGenericUDFForIndex(), children);
+ .getGenericUDFForIndex(), funcText, children);
          } else if (myt.getCategory() == Category.MAP) {
            // Only allow constant map key for now
            if (!(children.get(1) instanceof ExprNodeConstantDesc)) {
@@ -740,7 +740,7 @@ public final class TypeCheckProcFactory
            // Calculate TypeInfo
            TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo();
            desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry
- .getGenericUDFForIndex(), children);
+ .getGenericUDFForIndex(), funcText, children);
          } else {
            throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr,
                myt.getTypeName()));
@@ -861,7 +861,7 @@ public final class TypeCheckProcFactory
            }
          }

- desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children);
+ desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), funcText, children);
        }
        // UDFOPPositive is a no-op.
        // However, we still create it, and then remove it here, to make sure we

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1524226&r1=1524225&r2=1524226&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Tue Sep 17 21:13:53 2013
@@ -62,6 +62,7 @@ public class ExprNodeGenericFuncDesc ext
     */
    private GenericUDF genericUDF;
    private List<ExprNodeDesc> childExprs;
+ private transient String funcText;
    /**
     * This class uses a writableObjectInspector rather than a TypeInfo to store
     * the canonical type information for this NodeDesc.
@@ -73,13 +74,19 @@ public class ExprNodeGenericFuncDesc ext
    public ExprNodeGenericFuncDesc() {
    }

+ /* If the function has an explicit name like func(args) then call a
+ * constructor that explicitly provides the function name in the
+ * funcText argument.
+ */
    public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+ String funcText,
        List<ExprNodeDesc> children) {
      this(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo),
- genericUDF, children);
+ genericUDF, funcText, children);
    }

    public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+ String funcText,
        List<ExprNodeDesc> children) {
      super(TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
      this.writableObjectInspector =
@@ -87,6 +94,18 @@ public class ExprNodeGenericFuncDesc ext
      assert (genericUDF != null);
      this.genericUDF = genericUDF;
      this.childExprs = children;
+ this.funcText = funcText;
+ }
+
+ // Backward-compatibility interfaces for functions without a user-visible name.
+ public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF,
+ List<ExprNodeDesc> children) {
+ this(typeInfo, genericUDF, null, children);
+ }
+
+ public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF,
+ List<ExprNodeDesc> children) {
+ this(oi, genericUDF, null, children);
    }

    @Override
@@ -165,17 +184,20 @@ public class ExprNodeGenericFuncDesc ext
        cloneCh.add(ch.clone());
      }
      ExprNodeGenericFuncDesc clone = new ExprNodeGenericFuncDesc(typeInfo,
- FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh);
+ FunctionRegistry.cloneGenericUDF(genericUDF), funcText, cloneCh);
      return clone;
    }

    /**
- * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the
- * children parameters.
+ * Create a ExprNodeGenericFuncDesc based on the genericUDFClass and the
+ * children parameters. If the function has an explicit name, the
+ * newInstance method should be passed the function name in the funcText
+ * argument.
     *
     * @throws UDFArgumentException
     */
    public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+ String funcText,
        List<ExprNodeDesc> children) throws UDFArgumentException {
      ObjectInspector[] childrenOIs = new ObjectInspector[children.size()];
      for (int i = 0; i < childrenOIs.length; i++) {
@@ -232,7 +254,15 @@ public class ExprNodeGenericFuncDesc ext
        }
      }

- return new ExprNodeGenericFuncDesc(oi, genericUDF, children);
+ return new ExprNodeGenericFuncDesc(oi, genericUDF, funcText, children);
+ }
+
+ /* Backward-compatibility interface for the case where there is no explicit
+ * name for the function.
+ */
+ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF,
+ List<ExprNodeDesc> children) throws UDFArgumentException {
+ return newInstance(genericUDF, null, children);
    }

    @Override
@@ -285,4 +315,8 @@ public class ExprNodeGenericFuncDesc ext
    public void setSortedExpr(boolean isSortedExpr) {
      this.isSortedExpr = isSortedExpr;
    }
+
+ public String getFuncText() {
+ return this.funcText;
+ }
  }

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
+import org.apache.hadoop.hive.ql.exec.vector.udf.generic.GenericUDFIsNull;
+import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF;
+import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.junit.Test;
+
+/*
+ * Test the vectorized UDF adaptor to verify that custom legacy and generic
+ * UDFs can be run in vectorized mode.
+ */
+
+public class TestVectorUDFAdaptor {
+
+ static byte[] blue = null;
+ static byte[] red = null;
+
+ static {
+ try {
+ blue = "blue".getBytes("UTF-8");
+ red = "red".getBytes("UTF-8");
+ } catch (Exception e) {
+ ; // do nothing
+ }
+ }
+
+ @Test
+ public void testLongUDF() {
+
+ // create a syntax tree for a simple function call "longudf(col0)"
+ ExprNodeGenericFuncDesc funcDesc;
+ TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
+ GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false,
+ LongUDF.class.getName());
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ ExprNodeColumnDesc colDesc
+ = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
+ children.add(colDesc);
+ VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
+ argDescs[0] = new VectorUDFArgDesc();
+ argDescs[0].setVariable(0);
+ funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge,
+ genericUDFBridge.getUdfName(), children);
+
+ // create the adaptor for this function call to work in vector mode
+ VectorUDFAdaptor vudf = null;
+ try {
+ vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
+ } catch (HiveException e) {
+
+ // We should never get here.
+ assertTrue(false);
+ }
+
+ VectorizedRowBatch b = getBatchLongInLongOut();
+ vudf.evaluate(b);
+
+ // verify output
+ LongColumnVector out = (LongColumnVector) b.cols[1];
+ assertEquals(1000, out.vector[0]);
+ assertEquals(1001, out.vector[1]);
+ assertEquals(1002, out.vector[2]);
+ assertTrue(out.noNulls);
+ assertFalse(out.isRepeating);
+
+ // with nulls
+ b = getBatchLongInLongOut();
+ out = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = false;
+ vudf.evaluate(b);
+ assertFalse(out.noNulls);
+ assertEquals(1000, out.vector[0]);
+ assertEquals(1001, out.vector[1]);
+ assertTrue(out.isNull[2]);
+ assertFalse(out.isRepeating);
+
+ // with repeating
+ b = getBatchLongInLongOut();
+ out = (LongColumnVector) b.cols[1];
+ b.cols[0].isRepeating = true;
+ vudf.evaluate(b);
+
+ // The implementation may or may not set output it isRepeting.
+ // That is implementation-defined.
+ assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000
+ || !b.cols[1].isRepeating && out.vector[2] == 1000);
+ assertEquals(3, b.size);
+ }
+
+ @Test
+ public void testMultiArgumentUDF() {
+
+ // create a syntax tree for a function call "testudf(col0, col1, col2)"
+ ExprNodeGenericFuncDesc funcDesc;
+ TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
+ TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
+ TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
+ GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false,
+ ConcatTextLongDoubleUDF.class.getName());
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
+ children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
+ children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
+
+ VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
+ for (int i = 0; i < 3; i++) {
+ argDescs[i] = new VectorUDFArgDesc();
+ argDescs[i].setVariable(i);
+ }
+ funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge,
+ genericUDFBridge.getUdfName(), children);
+
+ // create the adaptor for this function call to work in vector mode
+ VectorUDFAdaptor vudf = null;
+ try {
+ vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
+ } catch (HiveException e) {
+
+ // We should never get here.
+ assertTrue(false);
+ throw new RuntimeException(e);
+ }
+
+ // with no nulls
+ VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
+ vudf.evaluate(b);
+ byte[] result = null;
+ byte[] result2 = null;
+ try {
+ result = "red:1:1.0".getBytes("UTF-8");
+ result2 = "blue:0:0.0".getBytes("UTF-8");
+ } catch (Exception e) {
+ ;
+ }
+ BytesColumnVector out = (BytesColumnVector) b.cols[3];
+ int cmp = StringExpr.compare(result, 0, result.length, out.vector[1],
+ out.start[1], out.length[1]);
+ assertEquals(0, cmp);
+ assertTrue(out.noNulls);
+
+ // with nulls
+ b = getBatchStrDblLongWithStrOut();
+ b.cols[1].noNulls = false;
+ vudf.evaluate(b);
+ out = (BytesColumnVector) b.cols[3];
+ assertFalse(out.noNulls);
+ assertTrue(out.isNull[1]);
+
+ // with all input columns repeating
+ b = getBatchStrDblLongWithStrOut();
+ b.cols[0].isRepeating = true;
+ b.cols[1].isRepeating = true;
+ b.cols[2].isRepeating = true;
+ vudf.evaluate(b);
+
+ out = (BytesColumnVector) b.cols[3];
+ assertTrue(out.isRepeating);
+ cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0],
+ out.start[0], out.length[0]);
+ assertEquals(0, cmp);
+ assertTrue(out.noNulls);
+ }
+
+ private VectorizedRowBatch getBatchLongInLongOut() {
+ VectorizedRowBatch b = new VectorizedRowBatch(2);
+ LongColumnVector in = new LongColumnVector();
+ LongColumnVector out = new LongColumnVector();
+ b.cols[0] = in;
+ b.cols[1] = out;
+ in.vector[0] = 0;
+ in.vector[1] = 1;
+ in.vector[2] = 2;
+ in.isNull[2] = true;
+ in.noNulls = true;
+ b.size = 3;
+ return b;
+ }
+
+ private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
+ VectorizedRowBatch b = new VectorizedRowBatch(4);
+ BytesColumnVector strCol = new BytesColumnVector();
+ LongColumnVector longCol = new LongColumnVector();
+ DoubleColumnVector dblCol = new DoubleColumnVector();
+ BytesColumnVector outCol = new BytesColumnVector();
+ b.cols[0] = strCol;
+ b.cols[1] = longCol;
+ b.cols[2] = dblCol;
+ b.cols[3] = outCol;
+
+ strCol.initBuffer();
+ strCol.setVal(0, blue, 0, blue.length);
+ strCol.setVal(1, red, 0, red.length);
+ longCol.vector[0] = 0;
+ longCol.vector[1] = 1;
+ dblCol.vector[0] = 0.0;
+ dblCol.vector[1] = 1.0;
+
+ // set one null value for possible later use
+ longCol.isNull[1] = true;
+
+ // but have no nulls initially
+ longCol.noNulls = true;
+ strCol.noNulls = true;
+ dblCol.noNulls = true;
+ outCol.initBuffer();
+ b.size = 2;
+ return b;
+ }
+
+
+ // test the UDF adaptor for a generic UDF (as opposed to a legacy UDF)
+ @Test
+ public void testGenericUDF() {
+
+ // create a syntax tree for a function call 'myisnull(col0, "UNKNOWN")'
+ ExprNodeGenericFuncDesc funcDesc;
+ GenericUDF genericUDF = new GenericUDFIsNull();
+ TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
+
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
+ children.add(new ExprNodeConstantDesc(typeInfoStr, "UNKNOWN"));
+
+ VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[2];
+ for (int i = 0; i < 2; i++) {
+ argDescs[i] = new VectorUDFArgDesc();
+ }
+ argDescs[0].setVariable(0);
+ argDescs[1].setConstant((ExprNodeConstantDesc) children.get(1));
+ funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDF, "myisnull", children);
+
+ // create the adaptor for this function call to work in vector mode
+ VectorUDFAdaptor vudf = null;
+ try {
+ vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
+ } catch (HiveException e) {
+
+ // We should never get here.
+ assertTrue(false);
+ }
+
+ VectorizedRowBatch b;
+
+ byte[] red = null;
+ byte[] unknown = null;
+ try {
+ red = "red".getBytes("UTF-8");
+ unknown = "UNKNOWN".getBytes("UTF-8");
+ } catch (Exception e) {
+ ;
+ }
+ BytesColumnVector out;
+
+ // with nulls
+ b = getBatchStrDblLongWithStrOut();
+ b.cols[0].noNulls = false;
+ b.cols[0].isNull[0] = true; // set 1st entry to null
+ vudf.evaluate(b);
+ out = (BytesColumnVector) b.cols[3];
+
+ // verify outputs
+ int cmp = StringExpr.compare(red, 0, red.length,
+ out.vector[1], out.start[1], out.length[1]);
+ assertEquals(0, cmp);
+ cmp = StringExpr.compare(unknown, 0, unknown.length,
+ out.vector[0], out.start[0], out.length[0]);
+ assertEquals(0, cmp);
+
+ // output entry should not be null for null input for this particular generic UDF
+ assertTrue(out.noNulls || !out.isNull[0]);
+ }
+}

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/generic/GenericUDFIsNull.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+@Description(name = "myisnull",
+value = "_FUNC_(value,default_value) - Returns default value if value is null else returns value",
+extended = "Example:\n"
++ " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n" + " bla")
+/*
+ * This is a copy of GenericUDFNvl, which is built-in. We'll make it a generic
+ * custom UDF for test purposes.
+ */
+public class GenericUDFIsNull extends GenericUDF{
+ private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+ private transient ObjectInspector[] argumentOIs;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ argumentOIs = arguments;
+ if (arguments.length != 2) {
+ throw new UDFArgumentLengthException(
+ "The operator 'MYISNULL' accepts 2 arguments.");
+ }
+ returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+ if (!(returnOIResolver.update(arguments[0]) && returnOIResolver
+ .update(arguments[1]))) {
+ throw new UDFArgumentTypeException(2,
+ "The first and seconds arguments of function MYISNULL should have the same type, "
+ + "but they are different: \"" + arguments[0].getTypeName()
+ + "\" and \"" + arguments[1].getTypeName() + "\"");
+ }
+ return returnOIResolver.get();
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(),
+ argumentOIs[0]);
+ if (retVal == null ){
+ retVal = returnOIResolver.convertIfNecessary(arguments[1].get(),
+ argumentOIs[1]);
+ }
+ return retVal;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("if ");
+ sb.append(children[0]);
+ sb.append(" is null ");
+ sb.append("returns");
+ sb.append(children[1]);
+ return sb.toString() ;
+ }
+
+}

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/ConcatTextLongDoubleUDF.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.legacy;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.io.Text;
+
+@Description(
+ name = "testudf",
+ value = "_FUNC_(str) - combines arguments to output string",
+ extended = "Example:\n" +
+ " > SELECT testudf(name, dob, salary) FROM employee;\n" +
+ " Jack"
+ )
+
+/* This is a test function that takes three different kinds
+ * of arguments, for use to verify vectorized UDF invocation.
+ */
+public class ConcatTextLongDoubleUDF extends UDF {
+ public Text evaluate(Text s, Long i, Double d) {
+
+ if (s == null
+ || i == null
+ || d == null) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder();
+ sb.append(s.toString());
+ sb.append(":");
+ sb.append(i);
+ sb.append(":");
+ sb.append(d);
+ return new Text(sb.toString());
+ }
+}

Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java?rev=1524226&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/LongUDF.java Tue Sep 17 21:13:53 2013
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.udf.legacy;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.LongWritable;
+
+/* A UDF like one a user would create, implementing the UDF interface.
+ * This is to be used to test the vectorized UDF adaptor for legacy-style UDFs.
+ */
+
+@Description(
+ name = "longudf",
+ value = "_FUNC_(arg) - returns arg + 1000",
+ extended = "Example:\n" +
+ " > SELECT longudf(eno) FROM employee;\n"
+ )
+
+public class LongUDF extends UDF {
+ public LongWritable evaluate(LongWritable i) {
+ if (i == null) {
+ return null;
+ }
+ return new LongWritable(i.get() + 1000);
+ }
+}
\ No newline at end of file

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 17, '13 at 9:14p
activeSep 17, '13 at 9:14p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase