FAQ
Author: ehans
Date: Wed Jan 8 18:15:44 2014
New Revision: 1556595

URL: http://svn.apache.org/r1556595
Log:
HIVE-6051: Create DecimalColumnVector and a representative VectorExpression for decimal (Eric Hanson)

Added:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java
Modified:
     hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/Decimal128.java Wed Jan 8 18:15:44 2014
@@ -1604,4 +1604,13 @@ public final class Decimal128 extends Nu
          + signum + ", BigDecimal.toString=" + toBigDecimal().toString()
          + ", unscaledValue=[" + unscaledValue.toString() + "])";
    }
+
+ /**
+ * Vectorized execution uses the smallest possible positive non-zero
+ * value to prevent possible later zero-divide exceptions. Set the field
+ * to this value (1 in the internal unsigned 128 bit int).
+ */
+ public void setNullDataValue() {
+ unscaledValue.update(1, 0, 0, 0);
+ }
  }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java?rev=1556595&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java Wed Jan 8 18:15:44 2014
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.io.Writable;
+
+public class DecimalColumnVector extends ColumnVector {
+
+ /**
+ * A vector if Decimal128 objects. These are mutable and have fairly
+ * efficient operations on them. This will make it faster to load
+ * column vectors and perform decimal vector operations with decimal-
+ * specific VectorExpressions.
+ *
+ * For high performance and easy access to this low-level structure,
+ * the fields are public by design (as they are in other ColumnVector
+ * types).
+ */
+ public Decimal128[] vector;
+ public short scale;
+ public short precision;
+
+ public DecimalColumnVector(int precision, int scale) {
+ super(VectorizedRowBatch.DEFAULT_SIZE);
+ this.precision = (short) precision;
+ this.scale = (short) scale;
+ final int len = VectorizedRowBatch.DEFAULT_SIZE;
+ vector = new Decimal128[len];
+ for (int i = 0; i < len; i++) {
+ vector[i] = new Decimal128(0, this.scale);
+ }
+ }
+
+ @Override
+ public Writable getWritableObject(int index) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void flatten(boolean selectedInUse, int[] sel, int size) {
+ // TODO Auto-generated method stub
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java?rev=1556595&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColAddDecimalColumn.java Wed Jan 8 18:15:44 2014
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
+import org.apache.hadoop.hive.ql.exec.vector.*;
+
+/**
+ * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic
+ * expressions between columns.
+ */
+public class DecimalColAddDecimalColumn extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ private int colNum1;
+ private int colNum2;
+ private int outputColumn;
+
+ public DecimalColAddDecimalColumn(int colNum1, int colNum2, int outputColumn) {
+ this.colNum1 = colNum1;
+ this.colNum2 = colNum2;
+ this.outputColumn = outputColumn;
+ }
+
+ public DecimalColAddDecimalColumn() {
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1];
+ DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2];
+ DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumn];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ Decimal128[] vector1 = inputColVector1.vector;
+ Decimal128[] vector2 = inputColVector2.vector;
+ Decimal128[] outputVector = outputColVector.vector;
+
+ // return immediately if batch is empty
+ if (n == 0) {
+ return;
+ }
+
+ outputColVector.isRepeating =
+ inputColVector1.isRepeating && inputColVector2.isRepeating
+ || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0]
+ || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0];
+
+ // Handle nulls first
+ NullUtil.propagateNullsColCol(
+ inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
+
+ /* Disregard nulls for processing. In other words,
+ * the arithmetic operation is performed even if one or
+ * more inputs are null. This is to improve speed by avoiding
+ * conditional checks in the inner loop.
+ */
+ if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
+ addChecked(0, vector1[0], vector2[0], outputColVector);
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ addChecked(i, vector1[0], vector2[i], outputColVector);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ addChecked(i, vector1[0], vector2[i], outputColVector);
+ }
+ }
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ addChecked(i, vector1[i], vector2[0], outputColVector);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ addChecked(i, vector1[i], vector2[0], outputColVector);
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ addChecked(i, vector1[i], vector2[i], outputColVector);
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ addChecked(i, vector1[i], vector2[i], outputColVector);
+ }
+ }
+ }
+
+ /* For the case when the output can have null values, follow
+ * the convention that the data values must be 1 for long and
+ * NaN for double. This is to prevent possible later zero-divide errors
+ * in complex arithmetic expressions like col2 / (col1 - 1)
+ * in the case when some col1 entries are null.
+ */
+ NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n);
+ }
+
+ // Addition with overflow check. Overflow produces NULL output.
+ private static void addChecked(int i, Decimal128 left, Decimal128 right,
+ DecimalColumnVector outputColVector) {
+ try {
+ Decimal128.add(left, right, outputColVector.vector[i], outputColVector.scale);
+ outputColVector.vector[i].checkPrecisionOverflow(outputColVector.precision);
+ } catch (ArithmeticException e) { // catch on overflow
+ outputColVector.noNulls = false;
+ outputColVector.isNull[i] = true;
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getColNum1() {
+ return colNum1;
+ }
+
+ public void setColNum1(int colNum1) {
+ this.colNum1 = colNum1;
+ }
+
+ public int getColNum2() {
+ return colNum2;
+ }
+
+ public void setColNum2(int colNum2) {
+ this.colNum2 = colNum2;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("decimal"),
+ VectorExpressionDescriptor.ArgumentType.getType("decimal"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Wed Jan 8 18:15:44 2014
@@ -18,6 +18,8 @@

  package org.apache.hadoop.hive.ql.exec.vector.expressions;

+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -27,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.ve
   * Utility functions to handle null propagation.
   */
  public class NullUtil {
+
    /**
     * Set the data value for all NULL entries to the designated NULL_VALUE.
     */
@@ -56,6 +59,7 @@ public class NullUtil {
    public static void setNullOutputEntriesColScalar(
        ColumnVector v, boolean selectedInUse, int[] sel, int n) {
      if (v instanceof DoubleColumnVector) {
+
        // No need to set null data entries because the input NaN values
        // will automatically propagate to the output.
        return;
@@ -285,4 +289,32 @@ public class NullUtil {
        }
      }
    }
+
+ /**
+ * Follow the convention that null decimal values are internally set to the smallest
+ * positive value available. Prevents accidental zero-divide later in expression
+ * evaluation.
+ */
+ public static void setNullDataEntriesDecimal(
+ DecimalColumnVector v, boolean selectedInUse, int[] sel,
+ int n) {
+ if (v.noNulls) {
+ return;
+ } else if (v.isRepeating && v.isNull[0]) {
+ v.vector[0].setNullDataValue();
+ } else if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if(v.isNull[i]) {
+ v.vector[i].setNullDataValue();
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if(v.isNull[i]) {
+ v.vector[i].setNullDataValue();
+ }
+ }
+ }
+ }
  }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java?rev=1556595&r1=1556594&r2=1556595&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java Wed Jan 8 18:15:44 2014
@@ -23,6 +23,8 @@ import static org.junit.Assert.assertFal
  import static org.junit.Assert.assertTrue;
  import junit.framework.Assert;

+import org.apache.hadoop.hive.common.type.Decimal128;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
@@ -33,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.ve
  import org.junit.Test;

  /**
- * Unit tests for vectori arithmetic expressions.
+ * Unit tests for vectorized arithmetic expressions.
   */
  public class TestVectorArithmeticExpressions {

@@ -284,4 +286,60 @@ public class TestVectorArithmeticExpress
      assertFalse(out.noNulls);
      assertFalse(out.isRepeating);
    }
+
+ @Test
+ public void testDecimalColAddDecimalColumn() {
+ VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
+ VectorExpression expr = new DecimalColAddDecimalColumn(0, 1, 2);
+ DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
+
+ // test without nulls
+ expr.evaluate(b);
+ assertTrue(r.vector[0].equals(new Decimal128("2.20", (short) 2)));
+ assertTrue(r.vector[1].equals(new Decimal128("-2.30", (short) 2)));
+ assertTrue(r.vector[2].equals(new Decimal128("1.00", (short) 2)));
+
+ // test nulls propagation
+ b = getVectorizedRowBatch3DecimalCols();
+ DecimalColumnVector c0 = (DecimalColumnVector) b.cols[0];
+ c0.noNulls = false;
+ c0.isNull[0] = true;
+ r = (DecimalColumnVector) b.cols[2];
+ expr.evaluate(b);
+ assertTrue(!r.noNulls && r.isNull[0]);
+
+ // Verify null output data entry is not 0, but rather the value specified by design,
+ // which is the minimum non-0 value, 0.01 in this case.
+ assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2)));
+
+ // test that overflow produces NULL
+ b = getVectorizedRowBatch3DecimalCols();
+ c0 = (DecimalColumnVector) b.cols[0];
+ c0.vector[0].update("9999999999999999.99", (short) 2); // set to max possible value
+ r = (DecimalColumnVector) b.cols[2];
+ expr.evaluate(b); // will cause overflow for result at position 0, must yield NULL
+ assertTrue(!r.noNulls && r.isNull[0]);
+
+ // verify proper null output data value
+ assertTrue(r.vector[0].equals(new Decimal128("0.01", (short) 2)));
+ }
+
+ private VectorizedRowBatch getVectorizedRowBatch3DecimalCols() {
+ VectorizedRowBatch b = new VectorizedRowBatch(3);
+ DecimalColumnVector v0, v1;
+ b.cols[0] = v0 = new DecimalColumnVector(18, 2);
+ b.cols[1] = v1 = new DecimalColumnVector(18, 2);
+ b.cols[2] = new DecimalColumnVector(18, 2);
+ v0.vector[0].update("1.20", (short) 2);
+ v0.vector[1].update("-3.30", (short) 2);
+ v0.vector[2].update("0", (short) 2);
+
+ v1.vector[0].update("1.00", (short) 2);
+ v1.vector[1].update("1.00", (short) 2);
+ v1.vector[2].update("1.00", (short) 2);
+
+ b.size = 3;
+
+ return b;
+ }
  }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 8, '14 at 6:22p
activeJan 8, '14 at 6:22p
posts1
users1
websitehive.apache.org

1 user in discussion

Ehans: 1 post

People

Translate

site design / logo © 2021 Grokbase