FAQ
Author: prasanthj
Date: Fri Dec 19 19:22:20 2014
New Revision: 1646836

URL: http://svn.apache.org/r1646836
Log:
HIVE-9166: Place an upper bound for SARG CNF conversion (Prasanth Jayachandran reviewed by Owen O'Malley)

Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Fri Dec 19 19:22:20 2014
@@ -22,16 +22,6 @@ import com.esotericsoftware.kryo.Kryo;
  import com.esotericsoftware.kryo.io.Input;
  import com.esotericsoftware.kryo.io.Output;

-import java.math.BigDecimal;
-import java.sql.Timestamp;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
  import org.apache.commons.codec.binary.Base64;
  import org.apache.commons.lang.StringUtils;
  import org.apache.commons.logging.Log;
@@ -64,6 +54,16 @@ import org.apache.hadoop.hive.serde2.obj
  import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
  import parquet.filter2.predicate.FilterApi;
  import parquet.filter2.predicate.FilterPredicate;

@@ -415,6 +415,8 @@ final class SearchArgumentImpl implement
    }

    static class ExpressionBuilder {
+ // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe
+ private static final int CNF_COMBINATIONS_THRESHOLD = 256;
      private final List<PredicateLeaf> leaves = new ArrayList<PredicateLeaf>();

      /**
@@ -844,14 +846,29 @@ final class SearchArgumentImpl implement
              }
            }
            if (!andList.isEmpty()) {
- root = new ExpressionTree(ExpressionTree.Operator.AND);
- generateAllCombinations(root.children, andList, nonAndList);
+ if (checkCombinationsThreshold(andList)) {
+ root = new ExpressionTree(ExpressionTree.Operator.AND);
+ generateAllCombinations(root.children, andList, nonAndList);
+ } else {
+ root = new ExpressionTree(TruthValue.YES_NO_NULL);
+ }
            }
          }
        }
        return root;
      }

+ private static boolean checkCombinationsThreshold(List<ExpressionTree> andList) {
+ int numComb = 1;
+ for (ExpressionTree tree : andList) {
+ numComb *= tree.children.size();
+ if (numComb > CNF_COMBINATIONS_THRESHOLD) {
+ return false;
+ }
+ }
+ return true;
+ }
+
      /**
       * Converts multi-level ands and ors into single level ones.
       * @param root the expression to flatten

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java Fri Dec 19 19:22:20 2014
@@ -18,7 +18,12 @@

  package org.apache.hadoop.hive.ql.io.sarg;

+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
+import static junit.framework.Assert.assertTrue;
+
  import com.google.common.collect.Sets;
+
  import org.apache.hadoop.hive.common.type.HiveChar;
  import org.apache.hadoop.hive.common.type.HiveDecimal;
  import org.apache.hadoop.hive.common.type.HiveVarchar;
@@ -27,8 +32,7 @@ import org.apache.hadoop.hive.ql.io.sarg
  import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
  import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.junit.*;
-import parquet.filter2.predicate.FilterPredicate;
+import org.junit.Test;

  import java.beans.XMLDecoder;
  import java.io.ByteArrayInputStream;
@@ -37,9 +41,7 @@ import java.math.BigDecimal;
  import java.util.List;
  import java.util.Set;

-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNull;
-import static junit.framework.Assert.assertTrue;
+import parquet.filter2.predicate.FilterPredicate;

  /**
   * These test the SARG implementation.
@@ -175,6 +177,17 @@ public class TestSearchArgumentImpl {
      assertEquals("(and leaf-1)",
          ExpressionBuilder.foldMaybe(and(or(leaf(2),
              constant(TruthValue.YES_NO_NULL)), leaf(1))).toString());
+ assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe(
+ ExpressionBuilder.convertToCNF(and(leaf(100),
+ or(and(leaf(0), leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17)))))).toString());
    }

    @Test
@@ -236,6 +249,25 @@ public class TestSearchArgumentImpl {
              and(leaf(3), leaf(4), leaf(5)),
              and(leaf(6), leaf(7)),
              leaf(8))).toString());
+ assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17)))).toString());
+ assertEquals("(and leaf-100 YES_NO_NULL)", ExpressionBuilder.convertToCNF(and(leaf(100),
+ or(and(leaf(0), leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17))))).toString());
      assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0), leaf(1), leaf(2)),
          and(leaf(3), leaf(4), leaf(5)),
          and(leaf(6), leaf(7)),

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 19, '14 at 7:22p
activeDec 19, '14 at 7:22p
posts1
users1
websitehive.apache.org

1 user in discussion

Prasanthj: 1 post

People

Translate

site design / logo © 2021 Grokbase