FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-1 bd44d45cd -> 8d26933d4


Fix build break "HIVE-11448: Support vectorization of Multi-OR and Multi-AND (Matt McCline, reviewed by Gopal V)"


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8d26933d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8d26933d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8d26933d

Branch: refs/heads/branch-1
Commit: 8d26933d401fc99ea6af382f0e9aadaae9074521
Parents: bd44d45
Author: Matt McCline <mmccline@hortonworks.com>
Authored: Sun Aug 9 19:23:18 2015 -0700
Committer: Matt McCline <mmccline@hortonworks.com>
Committed: Sun Aug 9 19:23:18 2015 -0700

----------------------------------------------------------------------
  .../hive/ql/exec/vector/ColumnVector.java | 10 +
  .../ql/exec/vector/VectorizationContext.java | 97 ++++++-
  .../vector/expressions/FilterExprAndExpr.java | 8 +-
  .../vector/expressions/FilterExprOrExpr.java | 140 ++++++++-
  .../exec/vector/TestVectorizationContext.java | 78 +++++
  .../TestVectorLogicalExpressions.java | 282 +++++++++++++++++++
  6 files changed, 595 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index 6b95360..6654166 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -35,6 +35,16 @@ import org.apache.hadoop.io.Writable;
  public abstract class ColumnVector {

    /*
+ * The current kinds of column vectors.
+ */
+ public static enum Type {
+ LONG,
+ DOUBLE,
+ BYTES,
+ DECIMAL
+ }
+
+ /*
     * If hasNulls is true, then this array contains true if the value
     * is null, otherwise false. The array is always allocated, so a batch can be re-used
     * later and nulls added.

http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 61d2972..06b32f3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
  import org.apache.hadoop.hive.ql.exec.FunctionInfo;
  import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
  import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
  import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
  import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
@@ -107,12 +108,14 @@ import org.apache.hadoop.hive.ql.udf.generic.*;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
  import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
  import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  import org.apache.hadoop.util.StringUtils;
  import org.apache.hive.common.util.DateUtils;

@@ -955,9 +958,43 @@ public class VectorizationContext {
      return expr;
    }

- private VectorExpression getVectorExpressionForUdf(Class<?> udf, List<ExprNodeDesc> childExpr, Mode mode,
+ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf,
+ Class<?> udfClass, List<ExprNodeDesc> childExpr, Mode mode,
        TypeInfo returnType) throws HiveException {
+
      int numChildren = (childExpr == null) ? 0 : childExpr.size();
+
+ if (numChildren > 2 && genericeUdf != null && mode == Mode.FILTER &&
+ ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) {
+
+ // Special case handling for Multi-OR and Multi-AND.
+
+ for (int i = 0; i < numChildren; i++) {
+ ExprNodeDesc child = childExpr.get(i);
+ String childTypeString = child.getTypeString();
+ if (childTypeString == null) {
+ throw new HiveException("Null child type name string");
+ }
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
+ Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
+ if (columnVectorType != ColumnVector.Type.LONG){
+ return null;
+ }
+ if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ }
+ Class<?> vclass;
+ if (genericeUdf instanceof GenericUDFOPOr) {
+ vclass = FilterExprOrExpr.class;
+ } else if (genericeUdf instanceof GenericUDFOPAnd) {
+ vclass = FilterExprAndExpr.class;
+ } else {
+ throw new RuntimeException("Unexpected multi-child UDF");
+ }
+ Mode childrenMode = getChildrenMode(mode, udfClass);
+ return createVectorExpression(vclass, childExpr, childrenMode, returnType);
+ }
      if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
        return null;
      }
@@ -984,14 +1021,14 @@ public class VectorizationContext {
        }
      }
      VectorExpressionDescriptor.Descriptor descriptor = builder.build();
- Class<?> vclass = this.vMap.getVectorExpressionClass(udf, descriptor);
+ Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor);
      if (vclass == null) {
        if (LOG.isDebugEnabled()) {
- LOG.debug("No vector udf found for "+udf.getSimpleName() + ", descriptor: "+descriptor);
+ LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor);
        }
        return null;
      }
- Mode childrenMode = getChildrenMode(mode, udf);
+ Mode childrenMode = getChildrenMode(mode, udfClass);
      return createVectorExpression(vclass, childExpr, childrenMode, returnType);
    }

@@ -1156,11 +1193,14 @@ public class VectorizationContext {
      }
      // Now do a general lookup
      Class<?> udfClass = udf.getClass();
+ boolean isSubstituted = false;
      if (udf instanceof GenericUDFBridge) {
        udfClass = ((GenericUDFBridge) udf).getUdfClass();
+ isSubstituted = true;
      }

- VectorExpression ve = getVectorExpressionForUdf(udfClass, castedChildren, mode, returnType);
+ VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null),
+ udfClass, castedChildren, mode, returnType);

      if (ve == null) {
        throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -1171,7 +1211,7 @@ public class VectorizationContext {

    private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf,
        List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
- VectorExpression ve = getVectorExpressionForUdf(udf.getClass(), childExpr, mode, returnType);
+ VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType);

      // Replace with the milliseconds conversion
      if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestampViaLongToLong) {
@@ -2044,6 +2084,51 @@ public class VectorizationContext {
      }
    }

+ public static String mapTypeNameSynonyms(String typeName) {
+ typeName = typeName.toLowerCase();
+ if (typeName.equals("long")) {
+ return "bigint";
+ } else if (typeName.equals("string_family")) {
+ return "string";
+ } else {
+ return typeName;
+ }
+ }
+
+ public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException {
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case DATE:
+ case TIMESTAMP:
+ case INTERVAL_YEAR_MONTH:
+ case INTERVAL_DAY_TIME:
+ return ColumnVector.Type.LONG;
+
+ case FLOAT:
+ case DOUBLE:
+ return ColumnVector.Type.DOUBLE;
+
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ case BINARY:
+ return ColumnVector.Type.BYTES;
+
+ case DECIMAL:
+ return ColumnVector.Type.DECIMAL;
+
+ default:
+ throw new HiveException("Unexpected primitive type category " + primitiveCategory);
+ }
+ }
+
    // TODO: When we support vectorized STRUCTs and can handle more in the reduce-side (MERGEPARTIAL):
    // TODO: Write reduce-side versions of AVG. Currently, only map-side (HASH) versions are in table.
    // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively

http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
index 1f14e42..41e3b0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
@@ -35,7 +35,9 @@ public class FilterExprAndExpr extends VectorExpression {
    @Override
    public void evaluate(VectorizedRowBatch batch) {
      childExpressions[0].evaluate(batch);
- childExpressions[1].evaluate(batch);
+ for (int childIndex = 1; childIndex < childExpressions.length; childIndex++) {
+ childExpressions[childIndex].evaluate(batch);
+ }
    }

    @Override
@@ -50,6 +52,10 @@ public class FilterExprAndExpr extends VectorExpression {

    @Override
    public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // IMPORTANT NOTE: For Multi-AND, the VectorizationContext class will catch cases with 3 or
+ // more parameters...
+
      return (new VectorExpressionDescriptor.Builder())
          .setMode(
              VectorExpressionDescriptor.Mode.FILTER)

http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
index f14307e..dc5139d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
@@ -18,6 +18,8 @@

  package org.apache.hadoop.hive.ql.exec.vector.expressions;

+import com.google.common.base.Preconditions;
+
  import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;

@@ -28,12 +30,88 @@ public class FilterExprOrExpr extends VectorExpression {
    private static final long serialVersionUID = 1L;
    private transient final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
    private transient int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private transient int[] unselectedCopy = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private transient int[] difference = new int[VectorizedRowBatch.DEFAULT_SIZE];
    private transient final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];

    public FilterExprOrExpr() {
      super();
    }

+ /**
+ * Remove (subtract) members from an array and produce the results into
+ * a difference array.
+
+ * @param all
+ * The selected array containing all members.
+ * @param allSize
+ * The size of all.
+ * @param remove
+ * The indices to remove. They must all be present in input selected array.
+ * @param removeSize
+ * The size of remove.
+ * @param difference
+ * The resulting difference -- the all array indices not in the
+ * remove array.
+ * @return
+ * The resulting size of the difference array.
+ */
+ private int subtract(int[] all, int allSize,
+ int[] remove, int removeSize, int[] difference) {
+
+ // UNDONE: Copied from VectorMapJoinOuterGenerateResultOperator.
+
+ Preconditions.checkState((all != remove) && (remove != difference) && (difference != all));
+
+ // Comment out these checks when we are happy..
+ if (!verifyMonotonicallyIncreasing(all, allSize)) {
+ throw new RuntimeException("all is not in sort order and unique");
+ }
+ if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+ throw new RuntimeException("remove is not in sort order and unique");
+ }
+
+ int differenceCount = 0;
+
+ // Determine which rows are left.
+ int removeIndex = 0;
+ for (int i = 0; i < allSize; i++) {
+ int candidateIndex = all[i];
+ if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+ removeIndex++;
+ } else {
+ difference[differenceCount++] = candidateIndex;
+ }
+ }
+
+ if (removeIndex != removeSize) {
+ throw new RuntimeException("Not all batch indices removed");
+ }
+
+ if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
+ throw new RuntimeException("difference is not in sort order and unique");
+ }
+
+ return differenceCount;
+ }
+
+ public boolean verifyMonotonicallyIncreasing(int[] selected, int size) {
+
+ if (size == 0) {
+ return true;
+ }
+ int prevBatchIndex = selected[0];
+
+ for (int i = 1; i < size; i++) {
+ int batchIndex = selected[i];
+ if (batchIndex <= prevBatchIndex) {
+ return false;
+ }
+ prevBatchIndex = batchIndex;
+ }
+ return true;
+ }
+
    @Override
    public void evaluate(VectorizedRowBatch batch) {
      int n = batch.size;
@@ -42,7 +120,6 @@ public class FilterExprOrExpr extends VectorExpression {
      }

      VectorExpression childExpr1 = this.childExpressions[0];
- VectorExpression childExpr2 = this.childExpressions[1];

      boolean prevSelectInUse = batch.selectedInUse;

@@ -80,17 +157,55 @@ public class FilterExprOrExpr extends VectorExpression {
        }
      }

- // Evaluate second child expression over unselected ones only.
+ int newSize = sizeAfterFirstChild;
+
      batch.selected = unselected;
      batch.size = unselectedSize;

- childExpr2.evaluate(batch);
-
- // Merge the result of last evaluate to previous evaluate.
- int newSize = batch.size + sizeAfterFirstChild;
- for (int i = 0; i < batch.size; i++) {
- tmp[batch.selected[i]] = 1;
+ if (unselectedSize > 0) {
+
+ // Evaluate subsequent child expression over unselected ones only.
+
+ final int childrenCount = this.childExpressions.length;
+ int childIndex = 1;
+ while (true) {
+
+ boolean isLastChild = (childIndex + 1 >= childrenCount);
+
+ // When we have yet another child beyond the current one... save unselected.
+ if (!isLastChild) {
+ System.arraycopy(batch.selected, 0, unselectedCopy, 0, unselectedSize);
+ }
+
+ VectorExpression childExpr = this.childExpressions[childIndex];
+
+ childExpr.evaluate(batch);
+
+ // Merge the result of last evaluate to previous evaluate.
+ newSize += batch.size;
+ for (int i = 0; i < batch.size; i++) {
+ tmp[batch.selected[i]] = 1;
+ }
+
+ if (isLastChild) {
+ break;
+ }
+
+ unselectedSize = subtract(unselectedCopy, unselectedSize, batch.selected, batch.size,
+ difference);
+ if (unselectedSize == 0) {
+ break;
+ }
+ System.arraycopy(difference, 0, batch.selected, 0, unselectedSize);
+ batch.size = unselectedSize;
+
+ childIndex++;
+ }
      }
+
+ // Important: Restore the batch's selected array.
+ batch.selected = selectedAfterFirstChild;
+
      int k = 0;
      for (int j = 0; j < n; j++) {
        int i = initialSelected[j];
@@ -99,16 +214,11 @@ public class FilterExprOrExpr extends VectorExpression {
        }
      }

-
      batch.size = newSize;
      if (newSize == n) {
        // Filter didn't do anything
        batch.selectedInUse = prevSelectInUse;
      }
-
- // unselected array is taken away by the row batch
- // so take the row batch's original one.
- unselected = selectedAfterFirstChild;
    }

    @Override
@@ -123,6 +233,10 @@ public class FilterExprOrExpr extends VectorExpression {

    @Override
    public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // IMPORTANT NOTE: For Multi-OR, the VectorizationContext class will catch cases with 3 or
+ // more parameters...
+
      return (new VectorExpressionDescriptor.Builder())
          .setMode(
              VectorExpressionDescriptor.Mode.FILTER)

http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 1a77033..98a8c3e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -503,6 +503,84 @@ public class TestVectorizationContext {
    }

    @Test
+ public void testVectorizeFilterMultiAndOrExpression() throws HiveException {
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
+ ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
+
+ GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
+ ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
+ greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ greaterExprDesc.setGenericUDF(udf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ children1.add(col1Expr);
+ children1.add(constDesc);
+ greaterExprDesc.setChildren(children1);
+
+ ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Float.class, "col2", "table", false);
+ ExprNodeConstantDesc const2Desc = new ExprNodeConstantDesc(new Float(1.0));
+
+ GenericUDFOPLessThan udf2 = new GenericUDFOPLessThan();
+ ExprNodeGenericFuncDesc lessExprDesc = new ExprNodeGenericFuncDesc();
+ lessExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ lessExprDesc.setGenericUDF(udf2);
+ List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>(2);
+ children2.add(col2Expr);
+ children2.add(const2Desc);
+ lessExprDesc.setChildren(children2);
+
+ ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Integer.class, "col3", "table", false);
+ ExprNodeConstantDesc const3Desc = new ExprNodeConstantDesc(new Integer(10));
+
+ GenericUDFOPGreaterThan udf3 = new GenericUDFOPGreaterThan();
+ ExprNodeGenericFuncDesc greaterExprDesc3 = new ExprNodeGenericFuncDesc();
+ greaterExprDesc3.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ greaterExprDesc3.setGenericUDF(udf3);
+ List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
+ children3.add(col3Expr);
+ children3.add(const3Desc);
+ greaterExprDesc3.setChildren(children3);
+
+ GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
+ ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
+ andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ andExprDesc.setGenericUDF(andUdf);
+ List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
+ children4.add(greaterExprDesc);
+ children4.add(lessExprDesc);
+ children4.add(greaterExprDesc3);
+ andExprDesc.setChildren(children4);
+
+ List<String> columns = new ArrayList<String>();
+ columns.add("col0");
+ columns.add("col1");
+ columns.add("col2");
+ columns.add("col3");
+ VectorizationContext vc = new VectorizationContext("name", columns);
+
+ VectorExpression ve = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
+
+ assertEquals(ve.getClass(), FilterExprAndExpr.class);
+ assertEquals(ve.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
+ assertEquals(ve.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class);
+ assertEquals(ve.getChildExpressions()[2].getClass(), FilterLongColGreaterLongScalar.class);
+
+ GenericUDFOPOr orUdf = new GenericUDFOPOr();
+ ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
+ orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ orExprDesc.setGenericUDF(orUdf);
+ List<ExprNodeDesc> children5 = new ArrayList<ExprNodeDesc>(2);
+ children5.add(greaterExprDesc);
+ children5.add(lessExprDesc);
+ children5.add(greaterExprDesc3);
+ orExprDesc.setChildren(children5);
+ VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertEquals(veOr.getClass(), FilterExprOrExpr.class);
+ assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
+ assertEquals(veOr.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class);
+ assertEquals(ve.getChildExpressions()[2].getClass(), FilterLongColGreaterLongScalar.class);
+ }
+
+ @Test
    public void testVectorizeAndOrProjectionExpression() throws HiveException {
      ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
      ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));

http://git-wip-us.apache.org/repos/asf/hive/blob/8d26933d/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
index 60e20a3..7d54a9c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
@@ -18,11 +18,14 @@

  package org.apache.hadoop.hive.ql.exec.vector.expressions;

+import static org.junit.Assert.assertFalse;
  import static org.junit.Assert.assertEquals;

+
  import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
  import org.junit.Assert;
  import org.junit.Test;
@@ -399,6 +402,157 @@ public class TestVectorLogicalExpressions {
      Assert.assertEquals(initialSize, batch.size);
    }

+ // A do nothing vectorized expression that passes all rows through.
+ public class SelectColumnAll extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnAll(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnAll() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ // Do nothing.
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that we don't expect will be called due to short-circuit evaluation.
+ public class SelectColumnNotExpected extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnNotExpected(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnNotExpected() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ assertFalse(true);
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that selects no rows.
+ public class SelectColumnNothing extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnNothing(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnNothing() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ batch.size = 0;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that selects no rows.
+ public class SelectColumnOne extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+ private int batchIndex;
+
+ public SelectColumnOne(int colNum1, int batchIndex) {
+ this();
+ this.colNum1 = colNum1;
+ this.batchIndex = batchIndex;
+ }
+
+ public SelectColumnOne() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ batch.selected[0] = batchIndex;
+ batch.size = 1;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
    @Test
    public void testFilterExprOrExpr() {
      VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
@@ -441,6 +595,134 @@ public class TestVectorLogicalExpressions {
    }

    @Test
+ public void testFilterExprMultiOrExpr() {
+
+ // Select all with the first expression and expect the other 2 children to not be invoked.
+
+ VectorizedRowBatch batch1a = getBatchThreeBooleanCols();
+
+ SelectColumnAll expr1a = new SelectColumnAll(0);
+ SelectColumnNotExpected expr2a = new SelectColumnNotExpected(1);
+ SelectColumnNotExpected expr3a = new SelectColumnNotExpected(1);
+
+ FilterExprOrExpr orExpr = new FilterExprOrExpr();
+ orExpr.setChildExpressions(new VectorExpression[] {expr1a, expr2a, expr3a});
+
+ orExpr.evaluate(batch1a);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1a.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1a.selected[i]);
+ }
+
+ // Select all with the is null and is not null as 2 child expressions, and then
+ // expect the 3rd child to not be invoked.
+
+ VectorizedRowBatch batch1b = getBatchThreeBooleanCols();
+
+ SelectColumnIsNotNull expr1b = new SelectColumnIsNotNull(0);
+ SelectColumnIsNull expr2b = new SelectColumnIsNull(0);
+ SelectColumnNotExpected expr3b = new SelectColumnNotExpected(0);
+
+ FilterExprOrExpr orExpr2 = new FilterExprOrExpr();
+ orExpr2.setChildExpressions(new VectorExpression[] {expr1b, expr2b, expr3b});
+
+ orExpr2.evaluate(batch1b);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1b.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1b.selected[i]);
+ }
+
+ // Select all with a is not null child, none as 2nd child, and is null with 3rd, and then
+ // expect the 3rd child to not be invoked.
+
+ VectorizedRowBatch batch1c = getBatchThreeBooleanCols();
+
+ SelectColumnIsNotNull expr1c = new SelectColumnIsNotNull(0);
+ SelectColumnNothing expr2c = new SelectColumnNothing(0);
+ SelectColumnIsNull expr3c = new SelectColumnIsNull(0);
+ SelectColumnNotExpected expr4c = new SelectColumnNotExpected(0);
+
+ FilterExprOrExpr orExpr3 = new FilterExprOrExpr();
+ orExpr3.setChildExpressions(new VectorExpression[] {expr1c, expr2c, expr3c, expr4c});
+
+ orExpr3.evaluate(batch1c);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1c.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1c.selected[i]);
+ }
+
+ // Select true fields child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1d = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1d = new SelectColumnIsTrue(0);
+ SelectColumnNothing expr2d = new SelectColumnNothing(0);
+ SelectColumnNothing expr3d = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr4 = new FilterExprOrExpr();
+ orExpr4.setChildExpressions(new VectorExpression[] {expr1d, expr3d, expr3d});
+
+ orExpr4.evaluate(batch1d);
+
+ int[] expected4 = {2,3,7};
+ assertEquals(expected4.length, batch1d.size);
+ for (int i = 0; i < expected4.length; i++) {
+ assertEquals(expected4[i], batch1d.selected[i]);
+ }
+
+
+ // Select none in 1st child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1e = getBatchThreeBooleanCols();
+
+ SelectColumnNothing expr1e = new SelectColumnNothing(0);
+ SelectColumnNothing expr2e = new SelectColumnNothing(0);
+ SelectColumnNothing expr3e = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr5 = new FilterExprOrExpr();
+ orExpr5.setChildExpressions(new VectorExpression[] {expr1e, expr2e, expr3e});
+
+ orExpr5.evaluate(batch1e);
+
+ assertEquals(0, batch1e.size);
+
+ // Select one in 1st child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1f = getBatchThreeBooleanCols();
+
+ SelectColumnOne expr1f = new SelectColumnOne(0, 4);
+ SelectColumnNothing expr2f = new SelectColumnNothing(0);
+ SelectColumnNothing expr3f = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr6 = new FilterExprOrExpr();
+ orExpr6.setChildExpressions(new VectorExpression[] {expr1f, expr2f, expr3f});
+
+ orExpr6.evaluate(batch1f);
+
+ assertEquals(1, batch1f.size);
+ assertEquals(4, batch1f.selected[0]);
+
+ // Select none in 1st child, one as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1g = getBatchThreeBooleanCols();
+
+ SelectColumnNothing expr1g = new SelectColumnNothing(0);
+ SelectColumnOne expr2g = new SelectColumnOne(0, 2);
+ SelectColumnNothing expr3g = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr7 = new FilterExprOrExpr();
+ orExpr7.setChildExpressions(new VectorExpression[] {expr1g, expr2g, expr3g});
+
+ orExpr7.evaluate(batch1g);
+
+ assertEquals(1, batch1g.size);
+ assertEquals(2, batch1g.selected[0]);
+ }
+
+ @Test
    public void testFilterExprOrExprWithBatchReuse() {
      VectorizedRowBatch batch1 = getBatchThreeBooleanCols();

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedAug 10, '15 at 2:23a
activeAug 10, '15 at 2:23a
posts1
users1
websitehive.apache.org

1 user in discussion

Mmccline: 1 post

People

Translate

site design / logo © 2021 Grokbase