FAQ
Author: ehans
Date: Sat Jan 18 01:32:16 2014
New Revision: 1559303

URL: http://svn.apache.org/r1559303
Log:
HIVE-6186: error in vectorized Column-Column comparison filter for repeating case (Eric Hanson)

Modified:
     hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java

Modified: hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt (original)
+++ hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt Sat Jan 18 01:32:16 2014
@@ -19,6 +19,7 @@
  package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;

  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil;
  import org.apache.hadoop.hive.ql.exec.vector.*;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
  import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -64,70 +65,43 @@ public class <ClassName> extends VectorE
        return;
      }

- if (inputColVector1.noNulls && inputColVector2.noNulls) {
- if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- //All must be selected otherwise size would be zero
- //Repeating property will not change.
- if (!(vector1[0] <OperatorSymbol> vector2[0])) {
- batch.size = 0;
- }
- } else if (inputColVector1.isRepeating) {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
- }
- } else if (inputColVector2.isRepeating) {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
- }
- } else if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
+ // filter rows with NULL on left input
+ int newSize;
+ newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n);
+ if (newSize < n) {
+ n = batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+
+ // filter rows with NULL on right input
+ newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n);
+ if (newSize < n) {
+ n = batch.size = newSize;
+ batch.selectedInUse = true;
+ }
+
+ // All rows with nulls have been filtered out, so just do normal filter for non-null case
+ if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) {
+
+ // All must be selected otherwise size would be zero
+ // Repeating property will not change.
+ if (!(vector1[0] <OperatorSymbol> vector2[0])) {
+ batch.size = 0;
+ }
+ } else if (inputColVector1.isRepeating) {
+ if (batch.selectedInUse) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
            int i = sel[j];
- if (vector1[i] <OperatorSymbol> vector2[i]) {
+ if (vector1[0] <OperatorSymbol> vector2[i]) {
              sel[newSize++] = i;
            }
          }
          batch.size = newSize;
        } else {
- int newSize = 0;
+ newSize = 0;
          for(int i = 0; i != n; i++) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
+ if (vector1[0] <OperatorSymbol> vector2[i]) {
              sel[newSize++] = i;
            }
          }
@@ -136,88 +110,42 @@ public class <ClassName> extends VectorE
            batch.selectedInUse = true;
          }
        }
- } else if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
- if (nullPos1[0] || nullPos2[0]) {
- batch.size = 0;
- }
- } else if (inputColVector1.isRepeating) {
- if (nullPos1[0]) {
- batch.size = 0;
- } else {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (!nullPos2[i]) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- }
- batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (!nullPos2[i]) {
- if (vector1[0] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
+ } else if (inputColVector2.isRepeating) {
+ if (batch.selectedInUse) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (vector1[i] <OperatorSymbol> vector2[0]) {
+ sel[newSize++] = i;
            }
          }
- }
- } else if (inputColVector2.isRepeating) {
- if (nullPos2[0]) {
- batch.size = 0;
+ batch.size = newSize;
        } else {
- if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
- int i = sel[j];
- if (!nullPos1[i]) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
+ newSize = 0;
+ for(int i = 0; i != n; i++) {
+ if (vector1[i] <OperatorSymbol> vector2[0]) {
+ sel[newSize++] = i;
            }
+ }
+ if (newSize < batch.size) {
            batch.size = newSize;
- } else {
- int newSize = 0;
- for(int i = 0; i != n; i++) {
- if (!nullPos1[i]) {
- if (vector1[i] <OperatorSymbol> vector2[0]) {
- sel[newSize++] = i;
- }
- }
- }
- if (newSize < batch.size) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
+ batch.selectedInUse = true;
          }
        }
      } else if (batch.selectedInUse) {
- int newSize = 0;
- for(int j=0; j != n; j++) {
+ newSize = 0;
+ for(int j = 0; j != n; j++) {
          int i = sel[j];
- if (!nullPos1[i] && !nullPos2[i]) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
+ if (vector1[i] <OperatorSymbol> vector2[i]) {
+ sel[newSize++] = i;
          }
        }
        batch.size = newSize;
      } else {
- int newSize = 0;
+ newSize = 0;
        for(int i = 0; i != n; i++) {
- if (!nullPos1[i] && !nullPos2[i]) {
- if (vector1[i] <OperatorSymbol> vector2[i]) {
- sel[newSize++] = i;
- }
+ if (vector1[i] <OperatorSymbol> vector2[i]) {
+ sel[newSize++] = i;
          }
        }
        if (newSize < batch.size) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Sat Jan 18 01:32:16 2014
@@ -336,4 +336,39 @@ public class NullUtil {
        Arrays.fill(v.isNull, 0, n, false);
      }
    }
+
+ /**
+ * Filter out rows with null values. Return the number of rows in the batch.
+ */
+ public static int filterNulls(ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ int newSize = 0;
+
+ if (v.noNulls) {
+
+ // no rows will be filtered
+ return n;
+ }
+
+ if (v.isRepeating) {
+
+ // all rows are filtered if repeating null, otherwise no rows are filtered
+ return v.isNull[0] ? 0 : n;
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ return newSize;
+ }
  }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java?rev=1559303&r1=1559302&r2=1559303&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java Sat Jan 18 01:32:16 2014
@@ -70,31 +70,60 @@ public class TestVectorFilterExpressions
    }

    @Test
- public void testFilterLongColEqualLongColumn() {
+ public void testFilterLongColGreaterLongColumn() {
      int seed = 17;
- VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
+ VectorizedRowBatch b = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
          VectorizedRowBatch.DEFAULT_SIZE,
          2, seed);
- LongColumnVector lcv0 = (LongColumnVector) vrg.cols[0];
- LongColumnVector lcv1 = (LongColumnVector) vrg.cols[1];
+ LongColumnVector lcv0 = (LongColumnVector) b.cols[0];
+ LongColumnVector lcv1 = (LongColumnVector) b.cols[1];
+ b.size = 3;
      FilterLongColGreaterLongColumn expr = new FilterLongColGreaterLongColumn(0, 1);

- //Basic case
- lcv0.vector[1] = 23;
- lcv1.vector[1] = 19;
- lcv0.vector[5] = 23;
- lcv1.vector[5] = 19;
- expr.evaluate(vrg);
- assertEquals(2, vrg.size);
- assertEquals(1, vrg.selected[0]);
- assertEquals(5, vrg.selected[1]);
+ // Basic case
+ lcv0.vector[0] = 10;
+ lcv0.vector[1] = 10;
+ lcv0.vector[2] = 10;
+ lcv1.vector[0] = 20;
+ lcv1.vector[1] = 1;
+ lcv1.vector[2] = 7;
+
+ expr.evaluate(b);
+ assertEquals(2, b.size);
+ assertEquals(1, b.selected[0]);
+ assertEquals(2, b.selected[1]);

- //handle null
+ // handle null with selected in use
      lcv0.noNulls = false;
      lcv0.isNull[1] = true;
- expr.evaluate(vrg);
- assertEquals(1, vrg.size);
- assertEquals(5, vrg.selected[0]);
+ expr.evaluate(b);
+ assertEquals(1, b.size);
+ assertEquals(2, b.selected[0]);
+
+ // handle repeating
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isRepeating = true;
+ lcv0.noNulls = true;
+ expr.evaluate(b);
+ assertEquals(2, b.size);
+
+ // handle repeating null
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isNull[0] = true;
+ lcv0.noNulls = false;
+ expr.evaluate(b);
+ assertEquals(0, b.size);
+
+ // handle null on both sizes (not repeating)
+ b.size = 3;
+ b.selectedInUse = false;
+ lcv0.isRepeating = false;
+ lcv1.noNulls = false;
+ lcv1.isNull[2] = true;
+ expr.evaluate(b);
+ assertEquals(0, b.size);
    }

    @Test

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 18, '14 at 1:32a
activeJan 18, '14 at 1:32a
posts1
users1
websitehive.apache.org

1 user in discussion

Ehans: 1 post

People

Translate

site design / logo © 2021 Grokbase