FAQ
Author: ehans
Date: Wed Dec 4 01:15:55 2013
New Revision: 1547666

URL: http://svn.apache.org/r1547666
Log:
HIVE-5877: Implement vectorized support for IN as boolean-valued expression (Eric Hanson)

Added:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Dec 4 01:15:55 2013
@@ -550,7 +550,7 @@ public class VectorizationContext {
      if (udf instanceof GenericUDFBetween) {
        return getBetweenFilterExpression(childExpr, mode);
      } else if (udf instanceof GenericUDFIn) {
- return getInFilterExpression(childExpr);
+ return getInExpression(childExpr, mode);
      } else if (udf instanceof GenericUDFOPPositive) {
        return getIdentityExpression(childExpr);
      } else if (udf instanceof GenericUDFBridge) {
@@ -575,11 +575,9 @@ public class VectorizationContext {
    }

    /**
- * Create a filter expression for column IN ( <list-of-constants> )
- * @param childExpr
- * @return
+ * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
     */
- private VectorExpression getInFilterExpression(List<ExprNodeDesc> childExpr)
+ private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, Mode mode)
        throws HiveException {
      ExprNodeDesc colExpr = childExpr.get(0);
      String colType = colExpr.getTypeString();
@@ -601,48 +599,41 @@ public class VectorizationContext {
      // determine class
      Class<?> cl = null;
      if (isIntFamily(colType)) {
- cl = FilterLongColumnInList.class;
+ cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
          inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
        }
- FilterLongColumnInList f = (FilterLongColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
+ expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+ ((ILongInExpr) expr).setInListValues(inVals);
      } else if (colType.equals("timestamp")) {
- cl = FilterLongColumnInList.class;
+ cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
          inVals[i] = getTimestampScalar(childrenForInList.get(i));
        }
- FilterLongColumnInList f = (FilterLongColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
+ expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+ ((ILongInExpr) expr).setInListValues(inVals);
      } else if (colType.equals("string")) {
- cl = FilterStringColumnInList.class;
+ cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
        byte[][] inVals = new byte[childrenForInList.size()][];
        for (int i = 0; i != inVals.length; i++) {
          inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
        }
- FilterStringColumnInList f =(FilterStringColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
+ expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+ ((IStringInExpr) expr).setInListValues(inVals);
      } else if (isFloatFamily(colType)) {
- cl = FilterDoubleColumnInList.class;
+ cl = (mode == Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
        double[] inValsD = new double[childrenForInList.size()];
        for (int i = 0; i != inValsD.length; i++) {
          inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
        }
- FilterDoubleColumnInList f = (FilterDoubleColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inValsD);
- expr = f;
- } else {
- throw new HiveException("Type " + colType + " not supported for IN in vectorized mode");
+ expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
+ ((IDoubleInExpr) expr).setInListValues(inValsD);
      }
+
+ // Return the desired VectorExpression if found. Otherwise, return null to cause
+ // execution to fall back to row mode.
      return expr;
    }


Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class DoubleColumnInList extends VectorExpression implements IDoubleInExpr {
+
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private double[] inListValues;
+
+ // The set object containing the IN list. This is optimized for lookup
+ // of the data type of the column.
+ private transient CuckooSetDouble inSet;
+
+ public DoubleColumnInList(int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ }
+
+ public DoubleColumnInList() {
+ super();
+ inSet = null;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ if (inSet == null) {
+ inSet = new CuckooSetDouble(inListValues.length);
+ inSet.load(inListValues);
+ }
+
+ DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] nullPos = inputColVector.isNull;
+ boolean[] outNulls = outputColVector.isNull;
+ int n = batch.size;
+ double[] vector = inputColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ outputColVector.isRepeating = false;
+ outputColVector.noNulls = inputColVector.noNulls;
+ if (inputColVector.noNulls) {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+ outputColVector.isRepeating = true;
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ } else {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ if (!nullPos[0]) {
+ outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+ outNulls[0] = false;
+ } else {
+ outNulls[0] = true;
+ }
+ outputColVector.isRepeating = true;
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outNulls[i] = nullPos[i];
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ } else {
+ System.arraycopy(nullPos, 0, outNulls, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public double[] getInListValues() {
+ return this.inListValues;
+ }
+
+ public void setInListValues(double[] a) {
+ this.inListValues = a;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // return null since this will be handled as a special case in VectorizationContext
+ return null;
+ }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java Wed Dec 4 01:15:55 2013
@@ -34,7 +34,7 @@ import java.util.regex.Pattern;
  /**
   * Evaluate IN filter on a batch for a vector of doubles.
   */
-public class FilterDoubleColumnInList extends VectorExpression {
+public class FilterDoubleColumnInList extends VectorExpression implements IDoubleInExpr {
    private static final long serialVersionUID = 1L;
    private int inputCol;
    private double[] inListValues;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java Wed Dec 4 01:15:55 2013
@@ -33,7 +33,8 @@ import java.util.regex.Pattern;
  /**
   * Evaluate IN filter on a batch for a vector of longs.
   */
-public class FilterLongColumnInList extends VectorExpression {
+public class FilterLongColumnInList extends VectorExpression implements ILongInExpr {
+
    private static final long serialVersionUID = 1L;
    private int inputCol;
    private long[] inListValues;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java Wed Dec 4 01:15:55 2013
@@ -37,7 +37,7 @@ import java.util.regex.Pattern;
   * the inner loop, and there is a hash table implemented
   * with Cuckoo hashing that has fast lookup to do the IN test.
   */
-public class FilterStringColumnInList extends VectorExpression {
+public class FilterStringColumnInList extends VectorExpression implements IStringInExpr {
    private static final long serialVersionUID = 1L;
    private int inputCol;
    private byte[][] inListValues;

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IDoubleInExpr {
+ void setInListValues(double[] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface ILongInExpr {
+ void setInListValues(long[] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Interface used for both filter and non-filter versions of IN to simplify
+ * VectorizationContext code.
+ */
+public interface IStringInExpr {
+ void setInListValues(byte[][] inVals);
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+
+/**
+ * Output a boolean value indicating if a column is IN a list of constants.
+ */
+public class LongColumnInList extends VectorExpression implements ILongInExpr {
+
+ private static final long serialVersionUID = 1L;
+
+ private int colNum;
+ private int outputColumn;
+ private long[] inListValues;
+
+ // The set object containing the IN list. This is optimized for lookup
+ // of the data type of the column.
+ private transient CuckooSetLong inSet;
+
+ public LongColumnInList(int colNum, int outputColumn) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ }
+
+ public LongColumnInList() {
+ super();
+ inSet = null;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ if (inSet == null) {
+ inSet = new CuckooSetLong(inListValues.length);
+ inSet.load(inListValues);
+ }
+
+ LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] nullPos = inputColVector.isNull;
+ boolean[] outNulls = outputColVector.isNull;
+ int n = batch.size;
+ long[] vector = inputColVector.vector;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ outputColVector.isRepeating = false;
+ outputColVector.noNulls = inputColVector.noNulls;
+ if (inputColVector.noNulls) {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+ outputColVector.isRepeating = true;
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ } else {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ if (!nullPos[0]) {
+ outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0;
+ outNulls[0] = false;
+ } else {
+ outNulls[0] = true;
+ }
+ outputColVector.isRepeating = true;
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outNulls[i] = nullPos[i];
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ } else {
+ System.arraycopy(nullPos, 0, outNulls, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public long[] getInListValues() {
+ return this.inListValues;
+ }
+
+ public void setInListValues(long [] a) {
+ this.inListValues = a;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // return null since this will be handled as a special case in VectorizationContext
+ return null;
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java?rev=1547666&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java Wed Dec 4 01:15:55 2013
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.io.Text;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Evaluate an IN boolean expression (not a filter) on a batch for a vector of strings.
+ * This is optimized so that no objects have to be created in
+ * the inner loop, and there is a hash table implemented
+ * with Cuckoo hashing that has fast lookup to do the IN test.
+ */
+public class StringColumnInList extends VectorExpression implements IStringInExpr {
+ private static final long serialVersionUID = 1L;
+ private int inputCol;
+ private int outputColumn;
+ private byte[][] inListValues;
+
+ // The set object containing the IN list. This is optimized for lookup
+ // of the data type of the column.
+ private transient CuckooSetBytes inSet;
+
+ public StringColumnInList() {
+ super();
+ inSet = null;
+ }
+
+ /**
+ * After construction you must call setInListValues() to add the values to the IN set.
+ */
+ public StringColumnInList(int colNum, int outputColumn) {
+ this.inputCol = colNum;
+ this.outputColumn = outputColumn;
+ inSet = null;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ if (inSet == null) {
+ inSet = new CuckooSetBytes(inListValues.length);
+ inSet.load(inListValues);
+ }
+
+ BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol];
+ LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ boolean[] nullPos = inputColVector.isNull;
+ int n = batch.size;
+ byte[][] vector = inputColVector.vector;
+ int[] start = inputColVector.start;
+ int[] len = inputColVector.length;
+ long[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ outputColVector.isRepeating = inputColVector.isRepeating;
+ outputColVector.noNulls = inputColVector.noNulls;
+ if (inputColVector.noNulls) {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
+ }
+ } else {
+ if (inputColVector.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ if (!nullPos[0]) {
+ outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0;
+ }
+ outputColVector.isNull[0] = nullPos[0];
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
+ outputColVector.isNull[i] = nullPos[i];
+ }
+ } else {
+ System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!nullPos[i]) {
+ outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0;
+ }
+ }
+ }
+ }
+ }
+
+
+ @Override
+ public String getOutputType() {
+ return "boolean";
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ public void setOutputColumn(int value) {
+ this.outputColumn = value;
+ }
+
+ public int getInputCol() {
+ return inputCol;
+ }
+
+ public void setInputCol(int colNum) {
+ this.inputCol = colNum;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+
+ // This VectorExpression (IN) is a special case, so don't return a descriptor.
+ return null;
+ }
+
+ public byte[][] getInListValues() {
+ return this.inListValues;
+ }
+
+ public void setInListValues(byte [][] a) {
+ this.inListValues = a;
+ }
+}

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java Wed Dec 4 01:15:55 2013
@@ -31,6 +31,7 @@ import junit.framework.Assert;

  import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble;
@@ -38,12 +39,14 @@ import org.apache.hadoop.hive.ql.exec.ve
  import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLTrim;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLower;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper;
@@ -957,8 +960,9 @@ public class TestVectorizationContext {
      assertTrue(ve instanceof FilterLongColumnNotBetween);
    }

+ // Test translation of both IN filters and boolean-valued IN expressions (non-filters).
    @Test
- public void testInFilters() throws HiveException {
+ public void testInFiltersAndExprs() throws HiveException {
      ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
      ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
      ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
@@ -979,6 +983,8 @@ public class TestVectorizationContext {
      VectorizationContext vc = new VectorizationContext(columnMap, 2);
      VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
      assertTrue(ve instanceof FilterStringColumnInList);
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+ assertTrue(ve instanceof StringColumnInList);

      // long IN
      children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
@@ -986,6 +992,8 @@ public class TestVectorizationContext {
      children1.set(2, new ExprNodeConstantDesc(20));
      ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
      assertTrue(ve instanceof FilterLongColumnInList);
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+ assertTrue(ve instanceof LongColumnInList);

      // double IN
      children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
@@ -993,5 +1001,7 @@ public class TestVectorizationContext {
      children1.set(2, new ExprNodeConstantDesc(20d));
      ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
      assertTrue(ve instanceof FilterDoubleColumnInList);
+ ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
+ assertTrue(ve instanceof DoubleColumnInList);
    }
  }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java Wed Dec 4 01:15:55 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec.v
  import static org.junit.Assert.assertEquals;

  import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
  import org.junit.Assert;
@@ -460,4 +461,67 @@ public class TestVectorLogicalExpression

      assertEquals(2, batch1.selected[0]);
    }
+
+ @Test
+ public void testLongInExpr() {
+
+ // check basic operation
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+ LongColumnVector outV = (LongColumnVector) b.cols[1];
+ long[] inVals = new long[2];
+ inVals[0] = 0;
+ inVals[1] = -2;
+ LongColumnInList expr = new LongColumnInList(0, 1);
+ expr.setInListValues(inVals);
+ expr.evaluate(b);
+ assertEquals(1, outV.vector[0]);
+ assertEquals(0, outV.vector[1]);
+
+ // check null handling
+ b.cols[0].noNulls = false;
+ b.cols[0].isNull[0] = true;
+ expr.evaluate(b);
+ assertEquals(true, !outV.noNulls && outV.isNull[0]);
+ assertEquals(0, outV.vector[1]);
+
+ // check isRepeating handling
+ b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+ outV = (LongColumnVector) b.cols[1];
+ b.cols[0].isRepeating = true;
+ expr.evaluate(b);
+ assertEquals(true, outV.isRepeating);
+ assertEquals(1, outV.vector[0]);
+ }
+
+ @Test
+ public void testDoubleInExpr() {
+
+ // check basic operation
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+ LongColumnVector outV = (LongColumnVector) b.cols[1];
+ double[] inVals = new double[2];
+ inVals[0] = -1.5d;
+ inVals[1] = 30d;
+ b.size = 2;
+ DoubleColumnInList expr = new DoubleColumnInList(0, 1);
+ expr.setInListValues(inVals);
+ expr.evaluate(b);
+ assertEquals(1, outV.vector[0]);
+ assertEquals(0, outV.vector[1]);
+
+ // check null handling
+ b.cols[0].noNulls = false;
+ b.cols[0].isNull[0] = true;
+ expr.evaluate(b);
+ assertEquals(true, !outV.noNulls && outV.isNull[0]);
+ assertEquals(0, outV.vector[1]);
+
+ // check isRepeating handling
+ b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+ outV = (LongColumnVector) b.cols[1];
+ b.cols[0].isRepeating = true;
+ expr.evaluate(b);
+ assertEquals(true, outV.isRepeating);
+ assertEquals(1, outV.vector[0]);
+ }
  }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java Wed Dec 4 01:15:55 2013
@@ -216,4 +216,29 @@ public class TestVectorScalarColArithmet
      assertFalse(out.noNulls);
      assertFalse(out.isRepeating);
    }
+
+ @Test
+ public void testBooleanValuedLongIn() {
+ VectorizedRowBatch batch = getBatch();
+ long[] a = new long[2];
+ a[0] = 20;
+ a[1] = 1000;
+ batch.size = 2;
+ VectorExpression expr = (new LongColumnInList(0, 1));
+ ((LongColumnInList) expr).setInListValues(a);
+ expr.evaluate(batch);
+ LongColumnVector out = (LongColumnVector) batch.cols[1];
+ Assert.assertEquals(0, out.vector[0]);
+ Assert.assertEquals(1, out.vector[1]);
+ }
+
+ private VectorizedRowBatch getBatch() {
+ VectorizedRowBatch b = new VectorizedRowBatch(2);
+ LongColumnVector v = new LongColumnVector();
+ v.vector[0] = 10;
+ v.vector[1] = 20;
+ b.cols[0] = v;
+ b.cols[1] = new LongColumnVector();
+ return b;
+ }
  }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1547666&r1=1547665&r2=1547666&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java Wed Dec 4 01:15:55 2013
@@ -1830,4 +1830,43 @@ public class TestVectorStringExpressions
      b.size = 5;
      return b;
    }
+
+ // Test boolean-valued (non-filter) IN expression for strings
+ @Test
+ public void testStringInExpr() {
+
+ // test basic operation
+ VectorizedRowBatch b = makeStringBatch();
+ b.size = 2;
+ b.cols[0].noNulls = true;
+ byte[][] inVals = new byte[2][];
+ inVals[0] = red;
+ inVals[1] = blue;
+ StringColumnInList expr = new StringColumnInList(0, 2);
+ expr.setInListValues(inVals);
+ expr.evaluate(b);
+ LongColumnVector outV = (LongColumnVector) b.cols[2];
+ Assert.assertEquals(1, outV.vector[0]);
+ Assert.assertEquals(0, outV.vector[1]);
+
+ // test null input
+ b = makeStringBatch();
+ b.size = 2;
+ b.cols[0].noNulls = false;
+ b.cols[0].isNull[0] = true;
+ expr.evaluate(b);
+ outV = (LongColumnVector) b.cols[2];
+ Assert.assertEquals(true, !outV.noNulls && outV.isNull[0] && !outV.isNull[1]);
+ Assert.assertEquals(0, outV.vector[1]);
+
+ // test repeating logic
+ b = makeStringBatch();
+ b.size = 2;
+ b.cols[0].noNulls = true;
+ b.cols[0].isRepeating = true;
+ expr.evaluate(b);
+ outV = (LongColumnVector) b.cols[2];
+ Assert.assertEquals(1, outV.vector[0]);
+ Assert.assertEquals(true, outV.isRepeating);
+ }
   }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 4, '13 at 1:16a
activeDec 4, '13 at 1:16a
posts1
users1
websitehive.apache.org

1 user in discussion

Ehans: 1 post

People

Translate

site design / logo © 2021 Grokbase