FAQ
http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index 82d9600..79a627c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -19,10 +19,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

  import java.util.ArrayList;
  import java.util.EnumSet;
+import java.util.HashSet;
  import java.util.List;
  import java.util.Set;

-import org.apache.calcite.plan.RelOptPredicateList;
  import org.apache.calcite.plan.RelOptRule;
  import org.apache.calcite.plan.RelOptRuleCall;
  import org.apache.calcite.plan.RelOptUtil;
@@ -30,7 +30,6 @@ import org.apache.calcite.rel.RelNode;
  import org.apache.calcite.rel.core.Filter;
  import org.apache.calcite.rel.core.RelFactories.FilterFactory;
  import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.metadata.RelMetadataQuery;
  import org.apache.calcite.rex.RexBuilder;
  import org.apache.calcite.rex.RexCall;
  import org.apache.calcite.rex.RexInputRef;
@@ -40,6 +39,7 @@ import org.apache.calcite.rex.RexUtil;
  import org.apache.calcite.sql.SqlKind;
  import org.slf4j.Logger;
  import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
  import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;

  import com.google.common.collect.ImmutableList;
@@ -47,31 +47,24 @@ import com.google.common.collect.LinkedHashMultimap;
  import com.google.common.collect.Multimap;
  import com.google.common.collect.Sets;

-
  public class HivePreFilteringRule extends RelOptRule {

- protected static final Logger LOG = LoggerFactory
- .getLogger(HivePreFilteringRule.class.getName());
-
-
- public static final HivePreFilteringRule INSTANCE =
- new HivePreFilteringRule();
+ protected static final Logger LOG = LoggerFactory
+ .getLogger(HivePreFilteringRule.class
+ .getName());

- private final FilterFactory filterFactory;
+ public static final HivePreFilteringRule INSTANCE = new HivePreFilteringRule();

+ private final FilterFactory filterFactory;

- private static final Set<SqlKind> COMPARISON = EnumSet.of(
- SqlKind.EQUALS,
- SqlKind.GREATER_THAN_OR_EQUAL,
- SqlKind.LESS_THAN_OR_EQUAL,
- SqlKind.GREATER_THAN,
- SqlKind.LESS_THAN,
- SqlKind.NOT_EQUALS);
-
+ private static final Set<SqlKind> COMPARISON = EnumSet.of(SqlKind.EQUALS,
+ SqlKind.GREATER_THAN_OR_EQUAL,
+ SqlKind.LESS_THAN_OR_EQUAL,
+ SqlKind.GREATER_THAN, SqlKind.LESS_THAN,
+ SqlKind.NOT_EQUALS);

    private HivePreFilteringRule() {
- super(operand(Filter.class,
- operand(RelNode.class, any())));
+ super(operand(Filter.class, operand(RelNode.class, any())));
      this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY;
    }

@@ -86,8 +79,7 @@ public class HivePreFilteringRule extends RelOptRule {
        return false;
      }

- HiveRulesRegistry registry = call.getPlanner().
- getContext().unwrap(HiveRulesRegistry.class);
+ HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);

      // If this operator has been visited already by the rule,
      // we do not need to apply the optimization
@@ -103,69 +95,95 @@ public class HivePreFilteringRule extends RelOptRule {
      final Filter filter = call.rel(0);

      // 0. Register that we have visited this operator in this rule
- HiveRulesRegistry registry = call.getPlanner().
- getContext().unwrap(HiveRulesRegistry.class);
+ HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
      if (registry != null) {
        registry.registerVisited(this, filter);
      }

      final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();

- final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
+ // 1. Recompose filter possibly by pulling out common elements from DNF
+ // expressions
+ RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
+
+ // 2. We extract possible candidates to be pushed down
+ List<RexNode> operandsToPushDown = new ArrayList<>();
+ List<RexNode> deterministicExprs = new ArrayList<>();
+ List<RexNode> nonDeterministicExprs = new ArrayList<>();
+
+ switch (topFilterCondition.getKind()) {
+ case AND:
+ ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) topFilterCondition)
+ .getOperands());
+ Set<String> operandsToPushDownDigest = new HashSet<String>();
+ List<RexNode> extractedCommonOperands = null;
+
+ for (RexNode operand : operands) {
+ if (operand.getKind() == SqlKind.OR) {
+ extractedCommonOperands = extractCommonOperands(rexBuilder, operand);
+ for (RexNode extractedExpr : extractedCommonOperands) {
+ if (operandsToPushDownDigest.add(extractedExpr.toString())) {
+ operandsToPushDown.add(extractedExpr);
+ }
+ }
+ }

- // 1. We extract possible candidates to be pushed down
- List<RexNode> commonOperands = new ArrayList<>();
- switch (condition.getKind()) {
- case AND:
- ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) condition).getOperands());
- for (RexNode operand: operands) {
- if (operand.getKind() == SqlKind.OR) {
- commonOperands.addAll(extractCommonOperands(rexBuilder,operand));
+ // TODO: Make expr traversal recursive. Extend to traverse inside
+ // elements of DNF/CNF & extract more deterministic pieces out.
+ if (HiveCalciteUtil.isDeterministic(operand)) {
+ deterministicExprs.add(operand);
+ } else {
+ nonDeterministicExprs.add(operand);
+ }
+ }
+
+ // Pull out Deterministic exprs from non-deterministic and push down
+ // deterministic expressions as a separate filter
+ // NOTE: Hive by convention doesn't pushdown non deterministic expressions
+ if (nonDeterministicExprs.size() > 0) {
+ for (RexNode expr : deterministicExprs) {
+ if (!operandsToPushDownDigest.contains(expr.toString())) {
+ operandsToPushDown.add(expr);
+ operandsToPushDownDigest.add(expr.toString());
            }
          }
- break;
- case OR:
- commonOperands = extractCommonOperands(rexBuilder,condition);
- break;
- default:
- return;
+
+ topFilterCondition = RexUtil.pullFactors(rexBuilder,
+ RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false));
+ }
+
+ break;
+
+ case OR:
+ operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition);
+ break;
+ default:
+ return;
      }

      // 2. If we did not generate anything for the new predicate, we bail out
- if (commonOperands.isEmpty()) {
+ if (operandsToPushDown.isEmpty()) {
        return;
      }

      // 3. If the new conjuncts are already present in the plan, we bail out
- final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput());
- final List<RexNode> newConjuncts = new ArrayList<>();
- for (RexNode commonOperand : commonOperands) {
- boolean found = false;
- for (RexNode conjunct : predicates.pulledUpPredicates) {
- if (commonOperand.toString().equals(conjunct.toString())) {
- found = true;
- break;
- }
- }
- if (!found) {
- newConjuncts.add(commonOperand);
- }
- }
+ final List<RexNode> newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(),
+ operandsToPushDown);
      if (newConjuncts.isEmpty()) {
        return;
      }

      // 4. Otherwise, we create a new condition
- final RexNode newCondition = RexUtil.pullFactors(rexBuilder,
- RexUtil.composeConjunction(rexBuilder, newConjuncts, false));
+ final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder,
+ RexUtil.composeConjunction(rexBuilder, newConjuncts, false));

      // 5. We create the new filter that might be pushed down
- RelNode newFilter = filterFactory.createFilter(filter.getInput(), newCondition);
- RelNode newTopFilter = filterFactory.createFilter(newFilter, condition);
+ RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition);
+ RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition);

      // 6. We register both so we do not fire the rule on them again
      if (registry != null) {
- registry.registerVisited(this, newFilter);
+ registry.registerVisited(this, newChildFilter);
        registry.registerVisited(this, newTopFilter);
      }

@@ -175,13 +193,15 @@ public class HivePreFilteringRule extends RelOptRule {

    private static List<RexNode> extractCommonOperands(RexBuilder rexBuilder, RexNode condition) {
      assert condition.getKind() == SqlKind.OR;
- Multimap<String,RexNode> reductionCondition = LinkedHashMultimap.create();
+ Multimap<String, RexNode> reductionCondition = LinkedHashMultimap.create();

- // Data structure to control whether a certain reference is present in every operand
+ // Data structure to control whether a certain reference is present in every
+ // operand
      Set<String> refsInAllOperands = null;

- // 1. We extract the information necessary to create the predicate for the new
- // filter; currently we support comparison functions, in and between
+ // 1. We extract the information necessary to create the predicate for the
+ // new
+ // filter; currently we support comparison functions, in and between
      ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
      for (int i = 0; i < operands.size(); i++) {
        final RexNode operand = operands.get(i);
@@ -190,27 +210,27 @@ public class HivePreFilteringRule extends RelOptRule {
        final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);

        Set<String> refsInCurrentOperand = Sets.newHashSet();
- for (RexNode conjunction: conjunctions) {
+ for (RexNode conjunction : conjunctions) {
          // We do not know what it is, we bail out for safety
- if (!(conjunction instanceof RexCall)) {
+ if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) {
            return new ArrayList<>();
          }
          RexCall conjCall = (RexCall) conjunction;
          RexNode ref = null;
- if(COMPARISON.contains(conjCall.getOperator().getKind())) {
- if (conjCall.operands.get(0) instanceof RexInputRef &&
- conjCall.operands.get(1) instanceof RexLiteral) {
+ if (COMPARISON.contains(conjCall.getOperator().getKind())) {
+ if (conjCall.operands.get(0) instanceof RexInputRef
+ && conjCall.operands.get(1) instanceof RexLiteral) {
              ref = conjCall.operands.get(0);
- } else if (conjCall.operands.get(1) instanceof RexInputRef &&
- conjCall.operands.get(0) instanceof RexLiteral) {
+ } else if (conjCall.operands.get(1) instanceof RexInputRef
+ && conjCall.operands.get(0) instanceof RexLiteral) {
              ref = conjCall.operands.get(1);
            } else {
              // We do not know what it is, we bail out for safety
              return new ArrayList<>();
            }
- } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
+ } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) {
            ref = conjCall.operands.get(0);
- } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
+ } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
            ref = conjCall.operands.get(1);
          } else {
            // We do not know what it is, we bail out for safety
@@ -228,7 +248,8 @@ public class HivePreFilteringRule extends RelOptRule {
        } else {
          refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand);
        }
- // If we did not add any factor or there are no common factors, we can bail out
+ // If we did not add any factor or there are no common factors, we can
+ // bail out
        if (refsInAllOperands.isEmpty()) {
          return new ArrayList<>();
        }
@@ -237,7 +258,8 @@ public class HivePreFilteringRule extends RelOptRule {
      // 2. We gather the common factors and return them
      List<RexNode> commonOperands = new ArrayList<>();
      for (String ref : refsInAllOperands) {
- commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
+ commonOperands
+ .add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
      }
      return commonOperands;
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index b52779c..c04060f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -17,6 +17,10 @@
   */
  package org.apache.hadoop.hive.ql.optimizer.calcite.stats;

+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
  import org.apache.calcite.plan.RelOptUtil;
  import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
  import org.apache.calcite.rel.RelNode;
@@ -31,8 +35,10 @@ import org.apache.calcite.sql.SqlKind;
  import org.apache.calcite.sql.SqlOperator;
  import org.apache.calcite.sql.type.SqlTypeUtil;
  import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
  import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
  import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;

  public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
    private final RelNode childRel;
@@ -81,6 +87,21 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
        break;
      }

+ case IS_NOT_NULL: {
+ if (childRel instanceof HiveTableScan) {
+ double noOfNulls = getMaxNulls(call, (HiveTableScan) childRel);
+ double totalNoOfTuples = childRel.getRows();
+ if (totalNoOfTuples >= noOfNulls) {
+ selectivity = (totalNoOfTuples - noOfNulls) / Math.max(totalNoOfTuples, 1);
+ } else {
+ throw new RuntimeException("Invalid Stats number of null > no of tuples");
+ }
+ } else {
+ selectivity = computeNotEqualitySelectivity(call);
+ }
+ break;
+ }
+
      case LESS_THAN_OR_EQUAL:
      case GREATER_THAN_OR_EQUAL:
      case LESS_THAN:
@@ -199,6 +220,33 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
      return selectivity;
    }

+ /**
+ * Given a RexCall & TableScan find max no of nulls. Currently it picks the
+ * col with max no of nulls.
+ *
+ * TODO: improve this
+ *
+ * @param call
+ * @param t
+ * @return
+ */
+ private long getMaxNulls(RexCall call, HiveTableScan t) {
+ long tmpNoNulls = 0;
+ long maxNoNulls = 0;
+
+ Set<Integer> iRefSet = HiveCalciteUtil.getInputRefs(call);
+ List<ColStatistics> colStats = t.getColStat(new ArrayList<Integer>(iRefSet));
+
+ for (ColStatistics cs : colStats) {
+ tmpNoNulls = cs.getNumNulls();
+ if (tmpNoNulls > maxNoNulls) {
+ maxNoNulls = tmpNoNulls;
+ }
+ }
+
+ return maxNoNulls;
+ }
+
    private Double getMaxNDV(RexCall call) {
      double tmpNDV;
      double maxNDV = 1.0;

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
new file mode 100644
index 0000000..b7244fd
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -0,0 +1,645 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.linq4j.function.Predicate1;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptPredicateList;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SemiJoin;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdPredicates;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexPermuteInputsShuttle;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+
+
+//TODO: Move this to calcite
+public class HiveRelMdPredicates extends RelMdPredicates {
+ public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.PREDICATES.method,
+ new HiveRelMdPredicates());
+
+ private static final List<RexNode> EMPTY_LIST = ImmutableList.of();
+
+ /**
+ * Infers predicates for a project.
+ *
+ * <ol>
+ * <li>create a mapping from input to projection. Map only positions that
+ * directly reference an input column.
+ * <li>Expressions that only contain above columns are retained in the
+ * Project's pullExpressions list.
+ * <li>For e.g. expression 'a + e = 9' below will not be pulled up because 'e'
+ * is not in the projection list.
+ *
+ * <pre>
+ * childPullUpExprs: {a &gt; 7, b + c &lt; 10, a + e = 9}
+ * projectionExprs: {a, b, c, e / 2}
+ * projectionPullupExprs: {a &gt; 7, b + c &lt; 10}
+ * </pre>
+ *
+ * </ol>
+ */
+ @Override
+ public RelOptPredicateList getPredicates(Project project) {
+ RelNode child = project.getInput();
+ final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
+ RelOptPredicateList childInfo = RelMetadataQuery.getPulledUpPredicates(child);
+
+ List<RexNode> projectPullUpPredicates = new ArrayList<RexNode>();
+ HashMultimap<Integer, Integer> inpIndxToOutIndxMap = HashMultimap.create();
+
+ ImmutableBitSet.Builder columnsMappedBuilder = ImmutableBitSet.builder();
+ Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, child.getRowType().getFieldCount(),
+ project.getRowType().getFieldCount());
+
+ for (Ord<RexNode> o : Ord.zip(project.getProjects())) {
+ if (o.e instanceof RexInputRef) {
+ int sIdx = ((RexInputRef) o.e).getIndex();
+ m.set(sIdx, o.i);
+ inpIndxToOutIndxMap.put(sIdx, o.i);
+ columnsMappedBuilder.set(sIdx);
+ }
+ }
+
+ // Go over childPullUpPredicates. If a predicate only contains columns in
+ // 'columnsMapped' construct a new predicate based on mapping.
+ final ImmutableBitSet columnsMapped = columnsMappedBuilder.build();
+ for (RexNode r : childInfo.pulledUpPredicates) {
+ ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(r);
+ if (columnsMapped.contains(rCols)) {
+ r = r.accept(new RexPermuteInputsShuttle(m, child));
+ projectPullUpPredicates.add(r);
+ }
+ }
+
+ // Project can also generate constants. We need to include them.
+ for (Ord<RexNode> expr : Ord.zip(project.getProjects())) {
+ if (RexLiteral.isNullLiteral(expr.e)) {
+ projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
+ rexBuilder.makeInputRef(project, expr.i)));
+ } else if (expr.e instanceof RexLiteral) {
+ final RexLiteral literal = (RexLiteral) expr.e;
+ projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(project, expr.i), literal));
+ } else if (expr.e instanceof RexCall && HiveCalciteUtil.isDeterministicFuncOnLiterals(expr.e)) {
+ //TODO: Move this to calcite
+ projectPullUpPredicates.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(project, expr.i), expr.e));
+ }
+ }
+ return RelOptPredicateList.of(projectPullUpPredicates);
+ }
+
+ /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */
+ @Override
+ public RelOptPredicateList getPredicates(Join join) {
+ RexBuilder rB = join.getCluster().getRexBuilder();
+ RelNode left = join.getInput(0);
+ RelNode right = join.getInput(1);
+
+ RelOptPredicateList leftInfo = RelMetadataQuery.getPulledUpPredicates(left);
+ RelOptPredicateList rightInfo = RelMetadataQuery.getPulledUpPredicates(right);
+
+ HiveJoinConditionBasedPredicateInference jI = new HiveJoinConditionBasedPredicateInference(join,
+ RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false),
+ RexUtil.composeConjunction(rB, rightInfo.pulledUpPredicates, false));
+
+ return jI.inferPredicates(false);
+ }
+
+ /**
+ * Utility to infer predicates from one side of the join that apply on the
+ * other side.
+ *
+ * <p>Contract is:<ul>
+ *
+ * <li>initialize with a {@link org.apache.calcite.rel.core.Join} and
+ * optional predicates applicable on its left and right subtrees.
+ *
+ * <li>you can
+ * then ask it for equivalentPredicate(s) given a predicate.
+ *
+ * </ul>
+ *
+ * <p>So for:
+ * <ol>
+ * <li>'<code>R1(x) join R2(y) on x = y</code>' a call for
+ * equivalentPredicates on '<code>x > 7</code>' will return '
+ * <code>[y > 7]</code>'
+ * <li>'<code>R1(x) join R2(y) on x = y join R3(z) on y = z</code>' a call for
+ * equivalentPredicates on the second join '<code>x > 7</code>' will return '
+ * <code>[y > 7, z > 7]</code>'
+ * </ol>
+ */
+ static class HiveJoinConditionBasedPredicateInference {
+ final Join joinRel;
+ final boolean isSemiJoin;
+ final int nSysFields;
+ final int nFieldsLeft;
+ final int nFieldsRight;
+ final ImmutableBitSet leftFieldsBitSet;
+ final ImmutableBitSet rightFieldsBitSet;
+ final ImmutableBitSet allFieldsBitSet;
+ SortedMap<Integer, BitSet> equivalence;
+ final Map<String, ImmutableBitSet> exprFields;
+ final Set<String> allExprsDigests;
+ final Set<String> equalityPredicates;
+ final RexNode leftChildPredicates;
+ final RexNode rightChildPredicates;
+
+ public HiveJoinConditionBasedPredicateInference(Join joinRel,
+ RexNode lPreds, RexNode rPreds) {
+ this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds);
+ }
+
+ private HiveJoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin,
+ RexNode lPreds, RexNode rPreds) {
+ super();
+ this.joinRel = joinRel;
+ this.isSemiJoin = isSemiJoin;
+ nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size();
+ nFieldsRight = joinRel.getRight().getRowType().getFieldList().size();
+ nSysFields = joinRel.getSystemFieldList().size();
+ leftFieldsBitSet = ImmutableBitSet.range(nSysFields,
+ nSysFields + nFieldsLeft);
+ rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft,
+ nSysFields + nFieldsLeft + nFieldsRight);
+ allFieldsBitSet = ImmutableBitSet.range(0,
+ nSysFields + nFieldsLeft + nFieldsRight);
+
+ exprFields = Maps.newHashMap();
+ allExprsDigests = new HashSet<String>();
+
+ if (lPreds == null) {
+ leftChildPredicates = null;
+ } else {
+ Mappings.TargetMapping leftMapping = Mappings.createShiftMapping(
+ nSysFields + nFieldsLeft, nSysFields, 0, nFieldsLeft);
+ leftChildPredicates = lPreds.accept(
+ new RexPermuteInputsShuttle(leftMapping, joinRel.getInput(0)));
+
+ for (RexNode r : RelOptUtil.conjunctions(leftChildPredicates)) {
+ exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r));
+ allExprsDigests.add(r.toString());
+ }
+ }
+ if (rPreds == null) {
+ rightChildPredicates = null;
+ } else {
+ Mappings.TargetMapping rightMapping = Mappings.createShiftMapping(
+ nSysFields + nFieldsLeft + nFieldsRight,
+ nSysFields + nFieldsLeft, 0, nFieldsRight);
+ rightChildPredicates = rPreds.accept(
+ new RexPermuteInputsShuttle(rightMapping, joinRel.getInput(1)));
+
+ for (RexNode r : RelOptUtil.conjunctions(rightChildPredicates)) {
+ exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r));
+ allExprsDigests.add(r.toString());
+ }
+ }
+
+ equivalence = Maps.newTreeMap();
+ equalityPredicates = new HashSet<String>();
+ for (int i = 0; i < nSysFields + nFieldsLeft + nFieldsRight; i++) {
+ equivalence.put(i, BitSets.of(i));
+ }
+
+ // Only process equivalences found in the join conditions. Processing
+ // Equivalences from the left or right side infer predicates that are
+ // already present in the Tree below the join.
+ RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
+ List<RexNode> exprs =
+ RelOptUtil.conjunctions(
+ compose(rexBuilder, ImmutableList.of(joinRel.getCondition())));
+
+ final EquivalenceFinder eF = new EquivalenceFinder();
+ new ArrayList<Void>(Lists.transform(exprs, new Function<RexNode, Void>() {
+ public Void apply(RexNode input) {
+ return input.accept(eF);
+ }
+ }));
+
+ equivalence = BitSets.closure(equivalence);
+ }
+
+ /**
+ * The PullUp Strategy is sound but not complete.
+ * <ol>
+ * <li>We only pullUp inferred predicates for now. Pulling up existing
+ * predicates causes an explosion of duplicates. The existing predicates are
+ * pushed back down as new predicates. Once we have rules to eliminate
+ * duplicate Filter conditions, we should pullUp all predicates.
+ * <li>For Left Outer: we infer new predicates from the left and set them as
+ * applicable on the Right side. No predicates are pulledUp.
+ * <li>Right Outer Joins are handled in an analogous manner.
+ * <li>For Full Outer Joins no predicates are pulledUp or inferred.
+ * </ol>
+ */
+ public RelOptPredicateList inferPredicates(
+ boolean includeEqualityInference) {
+ List<RexNode> inferredPredicates = new ArrayList<RexNode>();
+ Set<String> allExprsDigests = new HashSet<String>(this.allExprsDigests);
+ final JoinRelType joinType = joinRel.getJoinType();
+ switch (joinType) {
+ case INNER:
+ case LEFT:
+ infer(leftChildPredicates, allExprsDigests, inferredPredicates,
+ includeEqualityInference,
+ joinType == JoinRelType.LEFT ? rightFieldsBitSet
+ : allFieldsBitSet);
+ break;
+ }
+ switch (joinType) {
+ case INNER:
+ case RIGHT:
+ infer(rightChildPredicates, allExprsDigests, inferredPredicates,
+ includeEqualityInference,
+ joinType == JoinRelType.RIGHT ? leftFieldsBitSet
+ : allFieldsBitSet);
+ break;
+ }
+
+ Mappings.TargetMapping rightMapping = Mappings.createShiftMapping(
+ nSysFields + nFieldsLeft + nFieldsRight,
+ 0, nSysFields + nFieldsLeft, nFieldsRight);
+ final HiveJoinRexPermuteInputsShuttle rightPermute =
+ new HiveJoinRexPermuteInputsShuttle(rightMapping, joinRel);
+ Mappings.TargetMapping leftMapping = Mappings.createShiftMapping(
+ nSysFields + nFieldsLeft, 0, nSysFields, nFieldsLeft);
+ final HiveJoinRexPermuteInputsShuttle leftPermute =
+ new HiveJoinRexPermuteInputsShuttle(leftMapping, joinRel);
+
+ List<RexNode> leftInferredPredicates = new ArrayList<RexNode>();
+ List<RexNode> rightInferredPredicates = new ArrayList<RexNode>();
+
+ for (RexNode iP : inferredPredicates) {
+ ImmutableBitSet iPBitSet = RelOptUtil.InputFinder.bits(iP);
+ if (leftFieldsBitSet.contains(iPBitSet)) {
+ leftInferredPredicates.add(iP.accept(leftPermute));
+ } else if (rightFieldsBitSet.contains(iPBitSet)) {
+ rightInferredPredicates.add(iP.accept(rightPermute));
+ }
+ }
+
+ switch (joinType) {
+ case INNER:
+ Iterable<RexNode> pulledUpPredicates;
+ if (isSemiJoin) {
+ pulledUpPredicates = Iterables.concat(
+ RelOptUtil.conjunctions(leftChildPredicates),
+ leftInferredPredicates);
+ } else {
+ pulledUpPredicates = Iterables.concat(
+ RelOptUtil.conjunctions(leftChildPredicates),
+ RelOptUtil.conjunctions(rightChildPredicates),
+ RelOptUtil.conjunctions(joinRel.getCondition()),
+ inferredPredicates);
+ }
+ return RelOptPredicateList.of(pulledUpPredicates,
+ leftInferredPredicates, rightInferredPredicates);
+ case LEFT:
+ return RelOptPredicateList.of(
+ RelOptUtil.conjunctions(leftChildPredicates),
+ leftInferredPredicates, rightInferredPredicates);
+ case RIGHT:
+ return RelOptPredicateList.of(
+ RelOptUtil.conjunctions(rightChildPredicates),
+ inferredPredicates, EMPTY_LIST);
+ default:
+ assert inferredPredicates.size() == 0;
+ return RelOptPredicateList.EMPTY;
+ }
+ }
+
+ public RexNode left() {
+ return leftChildPredicates;
+ }
+
+ public RexNode right() {
+ return rightChildPredicates;
+ }
+
+ private void infer(RexNode predicates, Set<String> allExprsDigests,
+ List<RexNode> inferedPredicates, boolean includeEqualityInference,
+ ImmutableBitSet inferringFields) {
+ for (RexNode r : RelOptUtil.conjunctions(predicates)) {
+ if (!includeEqualityInference
+ && equalityPredicates.contains(r.toString())) {
+ continue;
+ }
+ for (Mapping m : mappings(r)) {
+ RexNode tr = r.accept(
+ new RexPermuteInputsShuttle(m, joinRel.getInput(0),
+ joinRel.getInput(1)));
+ if (inferringFields.contains(RelOptUtil.InputFinder.bits(tr))
+ && !allExprsDigests.contains(tr.toString())
+ && !isAlwaysTrue(tr)) {
+ inferedPredicates.add(tr);
+ allExprsDigests.add(tr.toString());
+ }
+ }
+ }
+ }
+
+ Iterable<Mapping> mappings(final RexNode predicate) {
+ return new Iterable<Mapping>() {
+ public Iterator<Mapping> iterator() {
+ ImmutableBitSet fields = exprFields.get(predicate.toString());
+ if (fields.cardinality() == 0) {
+ return Iterators.emptyIterator();
+ }
+ return new ExprsItr(fields);
+ }
+ };
+ }
+
+ private void equivalent(int p1, int p2) {
+ BitSet b = equivalence.get(p1);
+ b.set(p2);
+
+ b = equivalence.get(p2);
+ b.set(p1);
+ }
+
+ RexNode compose(RexBuilder rexBuilder, Iterable<RexNode> exprs) {
+ exprs = Linq4j.asEnumerable(exprs).where(new Predicate1<RexNode>() {
+ public boolean apply(RexNode expr) {
+ return expr != null;
+ }
+ });
+ return RexUtil.composeConjunction(rexBuilder, exprs, false);
+ }
+
+ /**
+ * Find expressions of the form 'col_x = col_y'.
+ */
+ class EquivalenceFinder extends RexVisitorImpl<Void> {
+ protected EquivalenceFinder() {
+ super(true);
+ }
+
+ @Override public Void visitCall(RexCall call) {
+ if (call.getOperator().getKind() == SqlKind.EQUALS) {
+ int lPos = pos(call.getOperands().get(0));
+ int rPos = pos(call.getOperands().get(1));
+ if (lPos != -1 && rPos != -1) {
+ HiveJoinConditionBasedPredicateInference.this.equivalent(lPos, rPos);
+ HiveJoinConditionBasedPredicateInference.this.equalityPredicates
+ .add(call.toString());
+ }
+ }
+ return null;
+ }
+ }
+
+ /**
+ * Given an expression returns all the possible substitutions.
+ *
+ * <p>For example, for an expression 'a + b + c' and the following
+ * equivalences: <pre>
+ * a : {a, b}
+ * b : {a, b}
+ * c : {c, e}
+ * </pre>
+ *
+ * <p>The following Mappings will be returned:
+ * <pre>
+ * {a->a, b->a, c->c}
+ * {a->a, b->a, c->e}
+ * {a->a, b->b, c->c}
+ * {a->a, b->b, c->e}
+ * {a->b, b->a, c->c}
+ * {a->b, b->a, c->e}
+ * {a->b, b->b, c->c}
+ * {a->b, b->b, c->e}
+ * </pre>
+ *
+ * <p>which imply the following inferences:
+ * <pre>
+ * a + a + c
+ * a + a + e
+ * a + b + c
+ * a + b + e
+ * b + a + c
+ * b + a + e
+ * b + b + c
+ * b + b + e
+ * </pre>
+ */
+ class ExprsItr implements Iterator<Mapping> {
+ final int[] columns;
+ final BitSet[] columnSets;
+ final int[] iterationIdx;
+ Mapping nextMapping;
+ boolean firstCall;
+
+ ExprsItr(ImmutableBitSet fields) {
+ nextMapping = null;
+ columns = new int[fields.cardinality()];
+ columnSets = new BitSet[fields.cardinality()];
+ iterationIdx = new int[fields.cardinality()];
+ for (int j = 0, i = fields.nextSetBit(0); i >= 0; i = fields
+ .nextSetBit(i + 1), j++) {
+ columns[j] = i;
+ columnSets[j] = equivalence.get(i);
+ iterationIdx[j] = 0;
+ }
+ firstCall = true;
+ }
+
+ public boolean hasNext() {
+ if (firstCall) {
+ initializeMapping();
+ firstCall = false;
+ } else {
+ computeNextMapping(iterationIdx.length - 1);
+ }
+ return nextMapping != null;
+ }
+
+ public Mapping next() {
+ return nextMapping;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ private void computeNextMapping(int level) {
+ int t = columnSets[level].nextSetBit(iterationIdx[level]);
+ if (t < 0) {
+ if (level == 0) {
+ nextMapping = null;
+ } else {
+ iterationIdx[level] = 0;
+ computeNextMapping(level - 1);
+ }
+ } else {
+ nextMapping.set(columns[level], t);
+ iterationIdx[level] = t + 1;
+ }
+ }
+
+ private void initializeMapping() {
+ nextMapping = Mappings.create(MappingType.PARTIAL_FUNCTION,
+ nSysFields + nFieldsLeft + nFieldsRight,
+ nSysFields + nFieldsLeft + nFieldsRight);
+ for (int i = 0; i < columnSets.length; i++) {
+ BitSet c = columnSets[i];
+ int t = c.nextSetBit(iterationIdx[i]);
+ if (t < 0) {
+ nextMapping = null;
+ return;
+ }
+ nextMapping.set(columns[i], t);
+ iterationIdx[i] = t + 1;
+ }
+ }
+ }
+
+ private int pos(RexNode expr) {
+ if (expr instanceof RexInputRef) {
+ return ((RexInputRef) expr).getIndex();
+ }
+ return -1;
+ }
+
+ private boolean isAlwaysTrue(RexNode predicate) {
+ if (predicate instanceof RexCall) {
+ RexCall c = (RexCall) predicate;
+ if (c.getOperator().getKind() == SqlKind.EQUALS) {
+ int lPos = pos(c.getOperands().get(0));
+ int rPos = pos(c.getOperands().get(1));
+ return lPos != -1 && lPos == rPos;
+ }
+ }
+ return predicate.isAlwaysTrue();
+ }
+ }
+
+ /**
+ * Shuttle which applies a permutation to its input fields.
+ *
+ * @see RexPermutationShuttle
+ * @see RexUtil#apply(org.apache.calcite.util.mapping.Mappings.TargetMapping, RexNode)
+ */
+ public static class HiveJoinRexPermuteInputsShuttle extends RexShuttle {
+ //~ Instance fields --------------------------------------------------------
+
+ private final Mappings.TargetMapping mapping;
+ private final ImmutableList<RelDataTypeField> fields;
+ private final RelOptCluster cluster;
+ private final RelDataType rType;
+
+ //~ Constructors -----------------------------------------------------------
+
+ private HiveJoinRexPermuteInputsShuttle(
+ Mappings.TargetMapping mapping,
+ RelNode input) {
+ this.mapping = mapping;
+ this.cluster = input.getCluster();
+ this.rType = input.getRowType();
+ this.fields = ImmutableList.copyOf(rType.getFieldList());
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ private static ImmutableList<RelDataTypeField> fields(RelNode[] inputs) {
+ final ImmutableList.Builder<RelDataTypeField> fields =
+ ImmutableList.builder();
+ for (RelNode input : inputs) {
+ fields.addAll(input.getRowType().getFieldList());
+ }
+ return fields.build();
+ }
+
+ @Override public RexNode visitInputRef(RexInputRef local) {
+ final int index = local.getIndex();
+ int target = mapping.getTarget(index);
+ return new RexInputRef(
+ target,
+ fields.get(index).getType());
+ }
+
+ @Override public RexNode visitCall(RexCall call) {
+ if (call.getOperator() == RexBuilder.GET_OPERATOR) {
+ final String name =
+ (String) ((RexLiteral) call.getOperands().get(1)).getValue2();
+ final int i = lookup(fields, name);
+ if (i >= 0) {
+ return RexInputRef.of(i, fields);
+ }
+ }
+ return HiveCalciteUtil.getTypeSafePred(cluster, super.visitCall(call), rType);
+ }
+
+ private static int lookup(List<RelDataTypeField> fields, String name) {
+ for (int i = 0; i < fields.size(); i++) {
+ final RelDataTypeField field = fields.get(i);
+ if (field.getName().equals(name)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+ }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index bcb9ea7..37249f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -202,6 +202,7 @@ public class SqlFunctionConverter {
          case IN:
          case BETWEEN:
          case ROW:
+ case IS_NOT_NULL:
            node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
            node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
            break;
@@ -314,7 +315,7 @@ public class SqlFunctionConverter {
        registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
        registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
        registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
-
+ registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL"));
      }

      private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index c1e314f..1d3a90a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -52,8 +52,12 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
  import org.apache.hadoop.hive.ql.plan.AggregationDesc;
  import org.apache.hadoop.hive.ql.plan.ColStatistics;
  import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
  import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
  import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
  import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
  import org.apache.hadoop.hive.ql.plan.GroupByDesc;
  import org.apache.hadoop.hive.ql.plan.JoinDesc;
@@ -337,8 +341,9 @@ public class StatsRulesProcFactory {
            }
          } else if (udf instanceof GenericUDFOPNot) {
            newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
+ } else if (udf instanceof GenericUDFOPNotNull) {
+ return evaluateNotNullExpr(stats, genFunc);
          } else {
-
            // single predicate condition
            newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount);
          }
@@ -443,6 +448,58 @@ public class StatsRulesProcFactory {
        return numRows / 2;
      }

+ private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred) {
+ long noOfNulls = getMaxNulls(parentStats, pred);
+ long parentCardinality = parentStats.getNumRows();
+ long newPredCardinality = parentCardinality;
+
+ if (parentCardinality > noOfNulls) {
+ newPredCardinality = parentCardinality - noOfNulls;
+ } else {
+ LOG.error("Invalid column stats: No of nulls > cardinality");
+ }
+
+ return newPredCardinality;
+ }
+
+ private long getMaxNulls(Statistics stats, ExprNodeDesc pred) {
+ long tmpNoNulls = 0;
+ long maxNoNulls = 0;
+
+ if (pred instanceof ExprNodeColumnDesc) {
+ ColStatistics cs = stats.getColumnStatisticsFromColName(((ExprNodeColumnDesc) pred)
+ .getColumn());
+ if (cs != null) {
+ tmpNoNulls = cs.getNumNulls();
+ }
+ } else if (pred instanceof ExprNodeGenericFuncDesc || pred instanceof ExprNodeColumnListDesc) {
+ long noNullsOfChild = 0;
+ for (ExprNodeDesc childExpr : pred.getChildren()) {
+ noNullsOfChild = getMaxNulls(stats, childExpr);
+ if (noNullsOfChild > tmpNoNulls) {
+ tmpNoNulls = noNullsOfChild;
+ }
+ }
+ } else if (pred instanceof ExprNodeConstantDesc) {
+ if (ExprNodeDescUtils.isNullConstant(pred)) {
+ tmpNoNulls = stats.getNumRows();
+ } else {
+ tmpNoNulls = 0;
+ }
+ } else if (pred instanceof ExprNodeDynamicListDesc) {
+ tmpNoNulls = 0;
+ } else if (pred instanceof ExprNodeFieldDesc) {
+ // TODO Confirm this is safe
+ tmpNoNulls = getMaxNulls(stats, ((ExprNodeFieldDesc) pred).getDesc());
+ }
+
+ if (tmpNoNulls > maxNoNulls) {
+ maxNoNulls = tmpNoNulls;
+ }
+
+ return maxNoNulls;
+ }
+
      private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
          AnnotateStatsProcCtx aspCtx, List<String> neededCols,
          FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException {
@@ -525,8 +582,7 @@ public class StatsRulesProcFactory {
udf instanceof GenericUDFOPLessThan) {
            return numRows / 3;
          } else if (udf instanceof GenericUDFOPNotNull) {
- long newNumRows = evaluateColEqualsNullExpr(stats, genFunc);
- return stats.getNumRows() - newNumRows;
+ return evaluateNotNullExpr(stats, genFunc);
          } else if (udf instanceof GenericUDFOPNull) {
            return evaluateColEqualsNullExpr(stats, genFunc);
          } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c005b1a..ac50673 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -62,13 +62,11 @@ import org.apache.calcite.rel.core.JoinRelType;
  import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
  import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
  import org.apache.calcite.rel.metadata.RelMetadataProvider;
-import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
-import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
+import org.apache.calcite.rel.rules.FilterMergeRule;
  import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
  import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
  import org.apache.calcite.rel.rules.ProjectMergeRule;
  import org.apache.calcite.rel.rules.ProjectRemoveRule;
-import org.apache.calcite.rel.rules.ReduceExpressionsRule;
  import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule;
  import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule;
  import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule;
@@ -137,7 +135,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
  import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
@@ -818,7 +818,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
    private class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
      private RelOptCluster cluster;
      private RelOptSchema relOptSchema;
- private final Map<String, PrunedPartitionList> partitionCache;
+ private final Map<String, PrunedPartitionList> partitionCache;

      // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
      // just last one.
@@ -918,11 +918,10 @@ public class CalcitePlanner extends SemanticAnalyzer {

        // 4. Run other optimizations that do not need stats
        calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(),
- HepMatchOrder.BOTTOM_UP, ReduceExpressionsRule.JOIN_INSTANCE,
- ReduceExpressionsRule.FILTER_INSTANCE, ReduceExpressionsRule.PROJECT_INSTANCE,
+ HepMatchOrder.BOTTOM_UP,
                ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE,
                new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY),
- HiveAggregateProjectMergeRule.INSTANCE);
+ HiveAggregateProjectMergeRule.INSTANCE, HiveJoinCommuteRule.INSTANCE);

        // 5. Run aggregate-join transpose (cost based)
        // If it failed because of missing stats, we continue with
@@ -955,10 +954,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
          }
        }

- // 6. Run rule to try to remove projects on top of join operators
- calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(),
- HepMatchOrder.BOTTOM_UP, HiveJoinCommuteRule.INSTANCE);
-
        // 7. Run rule to fix windowing issue when it is done over
        // aggregation columns (HIVE-10627)
        if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) {
@@ -1014,12 +1009,11 @@ public class CalcitePlanner extends SemanticAnalyzer {
       * @return
       */
      private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProvider mdProvider) {
-
        // TODO: Decorelation of subquery should be done before attempting
        // Partition Pruning; otherwise Expression evaluation may try to execute
        // corelated sub query.

- //0. Distinct aggregate rewrite
+ //1. Distinct aggregate rewrite
        // Run this optimization early, since it is expanding the operator pipeline.
        if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
            conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
@@ -1029,7 +1023,40 @@ public class CalcitePlanner extends SemanticAnalyzer {
          basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE);
        }

- // 1. Push down limit through outer join
+ // 2. Try factoring out common filter elements & separating deterministic
+ // vs non-deterministic UDF. This needs to run before PPD so that PPD can
+ // add on-clauses for old style Join Syntax
+ // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or
+ // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1
+ basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY,
+ HivePreFilteringRule.INSTANCE);
+
+ // 3. PPD for old Join Syntax
+ // NOTE: PPD needs to run before adding not null filters in order to
+ // support old style join syntax (so that on-clauses will get filled up).
+ // TODO: Add in ReduceExpressionrules (Constant folding) to below once
+ // HIVE-11927 is fixed.
+ basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule(
+ Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+ HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule(
+ HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN,
+ HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class,
+ HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new FilterMergeRule(
+ HiveFilter.DEFAULT_FILTER_FACTORY));
+
+ // TODO: Transitive inference, constant prop & Predicate push down has to
+ // do multiple passes till no more inference is left
+ // Currently doing so would result in a spin. Just checking for if inferred
+ // pred is present below may not be sufficient as inferred & pushed pred
+ // could have been mutated by constant folding/prop
+ // 4. Transitive inference for join on clauses
+ basePlan = hepPlan(basePlan, true, mdProvider, new HiveJoinPushTransitivePredicatesRule(
+ Join.class, HiveFilter.DEFAULT_FILTER_FACTORY));
+
+ // 5. Push down limit through outer join
+ // NOTE: We run this after PPD to support old style join syntax.
+ // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
+ // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10
        if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) {
          // This should be a cost based decision, but till we enable the extended cost
          // model, we will use the given value for the variable
@@ -1044,35 +1071,28 @@ public class CalcitePlanner extends SemanticAnalyzer {
              HiveProjectSortTransposeRule.INSTANCE);
        }

- // 2. Push Down Semi Joins
+ // 6. Add not null filters
+ basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE);
+
+ // 7. Rerun Constant propagation and PPD now that we have added Not NULL filters & did transitive inference
+ // TODO: Add in ReduceExpressionrules (Constant folding) to below once
+ // HIVE-11927 is fixed.
+ basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule(
+ Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+ HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule(
+ HiveFilter.DEFAULT_FILTER_FACTORY), HiveFilterJoinRule.JOIN,
+ HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class,
+ HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class), new FilterMergeRule(
+ HiveFilter.DEFAULT_FILTER_FACTORY));
+
+ // 8. Push Down Semi Joins
        basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE,
            SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);

- // 3. Add not null filters
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
- basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE);
- }
-
- // 4. Constant propagation, common filter extraction, and PPD
- basePlan = hepPlan(basePlan, true, mdProvider,
- ReduceExpressionsRule.PROJECT_INSTANCE,
- ReduceExpressionsRule.FILTER_INSTANCE,
- ReduceExpressionsRule.JOIN_INSTANCE,
- HivePreFilteringRule.INSTANCE,
- new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY,
- HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY),
- new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY),
- HiveFilterJoinRule.JOIN,
- HiveFilterJoinRule.FILTER_ON_JOIN,
- new FilterAggregateTransposeRule(Filter.class,
- HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class));
-
- // 5. Transitive inference & Partition Pruning
- basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule(
- Join.class, HiveFilter.DEFAULT_FILTER_FACTORY),
- new HivePartitionPruneRule(conf));
-
- // 6. Projection Pruning
+ // 9. Apply Partition Pruning
+ basePlan = hepPlan(basePlan, false, mdProvider, new HivePartitionPruneRule(conf));
+
+ // 10. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP)
        HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
            cluster, HiveProject.DEFAULT_PROJECT_FACTORY,
            HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY,
@@ -1080,11 +1100,18 @@ public class CalcitePlanner extends SemanticAnalyzer {
            HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
        basePlan = fieldTrimmer.trim(basePlan);

- // 7. Rerun PPD through Project as column pruning would have introduced DT
- // above scans
- basePlan = hepPlan(basePlan, true, mdProvider,
- new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY,
- HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY));
+
+ // 11. Merge Project-Project if possible
+ basePlan = hepPlan(basePlan, false, mdProvider, new ProjectMergeRule(true,
+ HiveProject.DEFAULT_PROJECT_FACTORY));
+
+ // 12. Rerun PPD through Project as column pruning would have introduced
+ // DT above scans; By pushing filter just above TS, Hive can push it into
+ // storage (incase there are filters on non partition cols). This only
+ // matches FIL-PROJ-TS
+ basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTSTransposeRule(
+ Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class,
+ HiveProject.DEFAULT_PROJECT_FACTORY, HiveTableScan.class));

        return basePlan;
      }

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index e291a48..0223038 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -487,6 +487,14 @@ public class ExprNodeDescUtils {
      return true;
    }

+ public static boolean isNullConstant(ExprNodeDesc value) {
+ if ((value instanceof ExprNodeConstantDesc)
+ && ((ExprNodeConstantDesc) value).getValue() == null) {
+ return true;
+ }
+ return false;
+ }
+
    public static PrimitiveTypeInfo deriveMinArgumentCast(
        ExprNodeDesc childExpr, TypeInfo targetType) {
      assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType;

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
index 799a66b..c017172 100644
--- a/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
+++ b/ql/src/test/queries/clientpositive/special_character_in_tabnames_1.q
@@ -94,7 +94,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -152,7 +152,7 @@ select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value fro



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -280,7 +280,7 @@ select * from (select q, b, `//cbo_t2`.p, `c/b/o_t1`.c, `cbo_/t3////`.c_int from



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -310,7 +310,7 @@ select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum

  select `cbo_/t3////`.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from `c/b/o_t1` where (`c/b/o_t1`.c_int + 1 >= 0) and (`c/b/o_t1`.c_int > 0 or `c/b/o_t1`.c_float >= 0) group by c_float, `c/b/o_t1`.c_int, key having `c/b/o_t1`.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc limit 5) `c/b/o_t1` left outer join (select key as p, c_int+1 as q, sum(c_int) as r from `//cbo_t2` where (`//cbo_t2`.c_int + 1 >= 0) and (`//cbo_t2`.c_int > 0 or `//cbo_t2`.c_float >= 0) group by c_float, `//cbo_t2`.c_int, key having `//cbo_t2`.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 limit 5) `//cbo_t2` on `c/b/o_t1`.a=p left outer join `cbo_/t3////` on `c/b/o_t1`.a=key where (b + `//cbo_t2`.q >= 0) and (b > 0 or c_int >= 0) group by `cbo_/t3////`.c_int, c having `cbo_/t3////`.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by `cbo_/t3////`.c_int % c asc, `cbo_/t3////`.c_int, c desc li
  mit 5;

-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -342,7 +342,7 @@ select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -452,7 +452,7 @@ select key from `c/b/o_t1` where c_int = -6 or c_int = +6;

  select count(`c/b/o_t1`.dt) from `c/b/o_t1` join `//cbo_t2` on `c/b/o_t1`.dt = `//cbo_t2`.dt where `c/b/o_t1`.dt = '2014' ;

-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -470,7 +470,7 @@ select `c/b/o_t1`.value from `c/b/o_t1` join `//cbo_t2` on `c/b/o_t1`.key = `//c



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -602,7 +602,7 @@ from (select b.key, count(*)



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -712,7 +712,7 @@ having p_name in



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -872,7 +872,7 @@ having b.p_mfgr not in



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -910,7 +910,7 @@ select count(distinct c_int) as a, avg(c_float) from `c/b/o_t1` group by c_int o

  select count(distinct c_int) as a, avg(c_float) from `c/b/o_t1` group by c_float, c_int order by a;

-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -936,7 +936,7 @@ select r2.key from (select key, c_int from (select key, c_int from `c/b/o_t1` un



-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;

@@ -1026,7 +1026,7 @@ drop view v3;

  drop view v4;

-set hive.cbo.enable=true;
+set hive.cbo.enable=false;

  set hive.exec.check.crossproducts=false;


http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
index cdecad1..e2e589c 100644
--- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
+++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
@@ -107,32 +107,32 @@ STAGE PLANS:
              Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: int), value (type: string)
                  outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col1 (type: string)
            TableScan
              alias: b
              Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: int), value (type: string)
                  outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col1 (type: string)
        Reduce Operator Tree:
          Join Operator
@@ -142,14 +142,14 @@ STAGE PLANS:
              0 _col0 (type: int)
              1 _col0 (type: int)
            outputColumnNames: _col0, _col1, _col4
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string)
              outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
              File Output Operator
                compressed: false
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -190,7 +190,7 @@ STAGE PLANS:
              Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE
                HashTable Sink Operator
                  keys:
                    0 key (type: int)
@@ -204,7 +204,7 @@ STAGE PLANS:
              Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE
                Map Join Operator
                  condition map:
                       Inner Join 0 to 1
@@ -212,14 +212,14 @@ STAGE PLANS:
                    0 key (type: int)
                    1 key (type: int)
                  outputColumnNames: _col0, _col1, _col7
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
                    expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
                    outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
                    File Output Operator
                      compressed: false
- Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
                      table:
                          input format: org.apache.hadoop.mapred.TextInputFormat
                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
index b2a7d89..f860011 100644
--- a/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
+++ b/ql/src/test/results/clientnegative/sortmerge_mapjoin_mismatch_1.q.out
@@ -77,7 +77,7 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
                HashTable Sink Operator
                  keys:
                    0 key (type: int)
@@ -91,7 +91,7 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE
                Map Join Operator
                  condition map:
                       Inner Join 0 to 1
@@ -99,14 +99,14 @@ STAGE PLANS:
                    0 key (type: int)
                    1 key (type: int)
                  outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
                    expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
                    outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE
                    File Output Operator
                      compressed: false
- Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE
                      table:
                          input format: org.apache.hadoop.mapred.TextInputFormat
                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
index 216b037..c6a3567 100644
--- a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
+++ b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
@@ -86,24 +86,24 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: ((key + 1) is not null and (key < 100)) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: (key + 1) (type: double)
                  sort order: +
                  Map-reduce partition columns: (key + 1) (type: double)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                  value expressions: key (type: string), value (type: string)
            TableScan
              alias: b
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: UDFToDouble(key) is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                Reduce Output Operator
                  key expressions: UDFToDouble(key) (type: double)
                  sort order: +
                  Map-reduce partition columns: UDFToDouble(key) (type: double)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  value expressions: key (type: string), value (type: string)
        Reduce Operator Tree:
          Join Operator
@@ -113,13 +113,13 @@ STAGE PLANS:
              0 (key + 1) (type: double)
              1 UDFToDouble(key) (type: double)
            outputColumnNames: _col0, _col1, _col5, _col6
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: 2 (type: int), concat(_col0, _col1, _col5, _col6) (type: string), concat(_col0, _col1) (type: string), concat(_col5, _col6) (type: string), concat(_col0, _col1, _col5) (type: string), concat(_col0, _col5, _col6) (type: string)
              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
              UDTF Operator
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                function name: stack
                Limit
                  Number of rows: 10

http://git-wip-us.apache.org/repos/asf/hive/blob/d8ee05ae/ql/src/test/results/clientpositive/ambiguous_col.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ambiguous_col.q.out b/ql/src/test/results/clientpositive/ambiguous_col.q.out
index 7f04e89..1d1d6af 100644
--- a/ql/src/test/results/clientpositive/ambiguous_col.q.out
+++ b/ql/src/test/results/clientpositive/ambiguous_col.q.out
@@ -17,32 +17,32 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string), value (type: string)
                  outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col1 (type: string)
            TableScan
              alias: src1
              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
        Reduce Operator Tree:
          Join Operator
            condition map:
@@ -51,14 +51,14 @@ STAGE PLANS:
              0 _col0 (type: string)
              1 _col0 (type: string)
            outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: _col0 (type: string), _col0 (type: string), _col1 (type: string)
              outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
              File Output Operator
                compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -136,31 +136,31 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            TableScan
              alias: src1
              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
        Reduce Operator Tree:
          Join Operator
            condition map:
@@ -169,14 +169,14 @@ STAGE PLANS:
              0 _col0 (type: string)
              1 _col0 (type: string)
            outputColumnNames: _col0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: _col0 (type: string), _col0 (type: string)
              outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
              File Output Operator
                compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -254,31 +254,31 @@ STAGE PLANS:
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            TableScan
              alias: src1
              Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: key (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
        Reduce Operator Tree:
          Join Operator
            condition map:
@@ -287,14 +287,14 @@ STAGE PLANS:
              0 _col0 (type: string)
              1 _col0 (type: string)
            outputColumnNames: _col0
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
            Select Operator
              expressions: _col0 (type: string), _col0 (type: string)
              outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
              File Output Operator
                compressed: false
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
                table:
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Search Discussions

Discussion Posts

Previous

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 88 of 102 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 12, '15 at 6:54a
activeDec 15, '15 at 12:39a
posts102
users1
websitehive.apache.org

1 user in discussion

Jpullokk: 102 posts

People

Translate

site design / logo © 2021 Grokbase