FAQ
Author: namit
Date: Fri Oct 5 04:13:06 2012
New Revision: 1394358

URL: http://svn.apache.org/viewvc?rev=1394358&view=rev
Log:
HIVE-3514 Refactor Partition Pruner so that logic can be reused.
(Gang Tim Liu via namit)


Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java Fri Oct 5 04:13:06 2012
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+
+/**
+ * Expression processor factory for pruning. Each processor tries to
+ * convert the expression subtree into a pruning expression.
+ *
+ * It can be used for partition prunner and list bucketing pruner.
+ */
+public abstract class PrunerExpressionOperatorFactory {
+
+ /**
+ * If all children are candidates and refer only to one table alias then this
+ * expr is a candidate else it is not a candidate but its children could be
+ * final candidates.
+ */
+ public static class GenericFuncExprProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ ExprNodeDesc newfd = null;
+ ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
+
+ boolean unknown = false;
+
+ if (FunctionRegistry.isOpAndOrNot(fd)) {
+ // do nothing because "And" and "Or" and "Not" supports null value
+ // evaluation
+ // NOTE: In the future all UDFs that treats null value as UNKNOWN (both
+ // in parameters and return
+ // values) should derive from a common base class UDFNullAsUnknown, so
+ // instead of listing the classes
+ // here we would test whether a class is derived from that base class.
+ // If All childs are null, set unknown to true
+ boolean isAllNull = true;
+ for (Object child : nodeOutputs) {
+ ExprNodeDesc child_nd = (ExprNodeDesc) child;
+ if (!(child_nd instanceof ExprNodeConstantDesc
+ && ((ExprNodeConstantDesc) child_nd).getValue() == null)) {
+ isAllNull = false;
+ }
+ }
+ unknown = isAllNull;
+ } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
+ // If it's a non-deterministic UDF, set unknown to true
+ unknown = true;
+ } else {
+ // If any child is null, set unknown to true
+ for (Object child : nodeOutputs) {
+ ExprNodeDesc child_nd = (ExprNodeDesc) child;
+ if (child_nd instanceof ExprNodeConstantDesc
+ && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
+ unknown = true;
+ }
+ }
+ }
+
+ if (unknown) {
+ newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null);
+ } else {
+ // Create the list of children
+ ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ for (Object child : nodeOutputs) {
+ children.add((ExprNodeDesc) child);
+ }
+ // Create a copy of the function descriptor
+ newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd.getGenericUDF(), children);
+ }
+
+ return newfd;
+ }
+
+ }
+
+ /**
+ * FieldExprProcessor.
+ *
+ */
+ public static class FieldExprProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd;
+ boolean unknown = false;
+ int idx = 0;
+ ExprNodeDesc left_nd = null;
+ for (Object child : nodeOutputs) {
+ ExprNodeDesc child_nd = (ExprNodeDesc) child;
+ if (child_nd instanceof ExprNodeConstantDesc
+ && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
+ unknown = true;
+ }
+ left_nd = child_nd;
+ }
+
+ assert (idx == 0);
+
+ ExprNodeDesc newnd = null;
+ if (unknown) {
+ newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null);
+ } else {
+ newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd.getFieldName(),
+ fnd.getIsList());
+ }
+ return newnd;
+ }
+
+ }
+
+
+ /**
+ * Processor for column expressions.
+ */
+ public static abstract class ColumnExprProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ ExprNodeDesc newcd = null;
+ ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
+ newcd = processColumnDesc(procCtx, cd);
+
+ return newcd;
+ }
+
+ /**
+ * Process column desc. It should be done by subclass.
+ *
+ * @param procCtx
+ * @param cd
+ * @return
+ */
+ protected abstract ExprNodeDesc processColumnDesc(NodeProcessorCtx procCtx,
+ ExprNodeColumnDesc cd);
+
+ }
+
+ /**
+ * Processor for constants and null expressions. For such expressions the
+ * processor simply clones the exprNodeDesc and returns it.
+ */
+ public static class DefaultExprProcessor implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ if (nd instanceof ExprNodeConstantDesc) {
+ return ((ExprNodeConstantDesc) nd).clone();
+ } else if (nd instanceof ExprNodeNullDesc) {
+ return ((ExprNodeNullDesc) nd).clone();
+ }
+
+ assert (false);
+ return null;
+ }
+ }
+
+ /**
+ * Instantiate default expression processor.
+ * @return
+ */
+ public static final NodeProcessor getDefaultExprProcessor() {
+ return new DefaultExprProcessor();
+ }
+
+ /**
+ * Instantiate generic function processor.
+ *
+ * @return
+ */
+ public static final NodeProcessor getGenericFuncProcessor() {
+ return new GenericFuncExprProcessor();
+ }
+
+ /**
+ * Instantiate field processor.
+ *
+ * @return
+ */
+ public static final NodeProcessor getFieldProcessor() {
+ return new FieldExprProcessor();
+ }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java Fri Oct 5 04:13:06 2012
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * Operator factory for pruning processing of operator graph We find
+ * all the filter operators that appear just beneath the table scan operators.
+ * We then pass the filter to the pruner to construct a pruner for
+ * that table alias and store a mapping from the table scan operator to that
+ * pruner. We call that pruner later during plan generation.
+ *
+ * Create this class from org.apache.hadoop.hive.ql.optimizer.ppr.OpProcFactory
+ * so that in addition to ppr, other pruner like list bucketin pruner can use it.
+ */
+public abstract class PrunerOperatorFactory {
+
+ /**
+ * Determines the partition pruner for the filter. This is called only when
+ * the filter follows a table scan operator.
+ */
+ public static abstract class FilterPruner implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ FilterOperator fop = (FilterOperator) nd;
+ FilterOperator fop2 = null;
+
+ // The stack contains either ... TS, Filter or
+ // ... TS, Filter, Filter with the head of the stack being the rightmost
+ // symbol. So we just pop out the two elements from the top and if the
+ // second one of them is not a table scan then the operator on the top of
+ // the stack is the Table scan operator.
+ Node tmp = stack.pop();
+ Node tmp2 = stack.pop();
+ TableScanOperator top = null;
+ if (tmp2 instanceof TableScanOperator) {
+ top = (TableScanOperator) tmp2;
+ } else {
+ top = (TableScanOperator) stack.peek();
+ fop2 = (FilterOperator) tmp2;
+ }
+ stack.push(tmp2);
+ stack.push(tmp);
+
+ // If fop2 exists (i.e this is not the top level filter and fop2 is not
+ // a sampling filter then we ignore the current filter
+ if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
+ return null;
+ }
+
+ // ignore the predicate in case it is not a sampling predicate
+ if (fop.getConf().getIsSamplingPred()) {
+ return null;
+ }
+
+ generatePredicate(procCtx, fop, top);
+
+ return null;
+ }
+
+ /**
+ * Generate predicate.
+ *
+ * Subclass should implement the function. Please refer to {@link OpProcFactory.FilterPPR}
+ *
+ * @param procCtx
+ * @param fop
+ * @param top
+ * @throws SemanticException
+ * @throws UDFArgumentException
+ */
+ protected abstract void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop,
+ TableScanOperator top) throws SemanticException, UDFArgumentException;
+
+ /**
+ * Add pruning predicate.
+ *
+ * @param opToPrunner
+ * @param top
+ * @param new_pruner_pred
+ * @throws UDFArgumentException
+ */
+ protected void addPruningPred(Map<TableScanOperator, ExprNodeDesc> opToPrunner,
+ TableScanOperator top, ExprNodeDesc new_pruner_pred) throws UDFArgumentException {
+ ExprNodeDesc old_pruner_pred = opToPrunner.get(top);
+ ExprNodeDesc pruner_pred = null;
+ if (old_pruner_pred != null) {
+ // or the old_pruner_pred and the new_ppr_pred
+ pruner_pred = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("OR",
+ old_pruner_pred, new_pruner_pred);
+ } else {
+ pruner_pred = new_pruner_pred;
+ }
+
+ // Put the mapping from table scan operator to pruner_pred
+ opToPrunner.put(top, pruner_pred);
+
+ return;
+ }
+ }
+
+ /**
+ * Default processor which just merges its children.
+ */
+ public static class DefaultPruner implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ // Nothing needs to be done.
+ return null;
+ }
+ }
+
+ /**
+ * Instantiate default processor.
+ *
+ * It's not supposed to be overwritten.
+ *
+ * @return
+ */
+ public final static NodeProcessor getDefaultProc() {
+ return new DefaultPruner();
+ }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java Fri Oct 5 04:13:06 2012
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
+/**
+ * General utility common functions for the Pruner to do optimization.
+ *
+ */
+public final class PrunerUtils {
+ private static Log LOG;
+
+ static {
+ LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.PrunerUtils");
+ }
+
+ private PrunerUtils() {
+ //prevent instantiation
+ }
+
+ /**
+ * Walk operator tree for pruner generation.
+ *
+ * @param pctx
+ * @param opWalkerCtx
+ * @param filterProc
+ * @param defaultProc
+ * @throws SemanticException
+ */
+ public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx,
+ NodeProcessor filterProc, NodeProcessor defaultProc) throws SemanticException {
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+
+ // Build regular expression for operator rule.
+ // "(TS%FIL%)|(TS%FIL%FIL%)"
+ String tsOprName = TableScanOperator.getOperatorName();
+ String filtOprName = FilterOperator.getOperatorName();
+
+ opRules.put(new RuleRegExp("R1", new StringBuilder().append("(").append(tsOprName).append("%")
+ .append(filtOprName).append("%)|(").append(tsOprName).append("%").append(filtOprName)
+ .append("%").append(filtOprName).append("%)").toString()), filterProc);
+
+ // The dispatcher fires the processor corresponding to the closest matching
+ // rule and passes the context along
+ Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+
+ // Create a list of topop nodes
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(pctx.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+ }
+
+ /**
+ * Walk expression tree for pruner generation.
+ *
+ * @param pred
+ * @param ctx
+ * @param colProc
+ * @param fieldProc
+ * @param genFuncProc
+ * @param defProc
+ * @return
+ * @throws SemanticException
+ */
+ public static Map<Node, Object> walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx,
+ NodeProcessor colProc, NodeProcessor fieldProc, NodeProcessor genFuncProc,
+ NodeProcessor defProc)
+ throws SemanticException {
+ // create a walker which walks the tree in a DFS manner while maintaining
+ // the operator stack. The dispatcher
+ // generates the plan from the operator tree
+ Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
+ exprRules.put(new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), colProc);
+ exprRules.put(new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), fieldProc);
+ exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + "%"),
+ genFuncProc);
+
+ // The dispatcher fires the processor corresponding to the closest matching
+ // rule and passes the context along
+ Dispatcher disp = new DefaultRuleDispatcher(defProc, exprRules, ctx);
+ GraphWalker egw = new DefaultGraphWalker(disp);
+
+ List<Node> startNodes = new ArrayList<Node>();
+ startNodes.add(pred);
+
+ HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
+ egw.startWalking(startNodes, outputMap);
+ return outputMap;
+ }
+
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java Fri Oct 5 04:13:06 2012
@@ -18,54 +18,46 @@

package org.apache.hadoop.hive.ql.optimizer.ppr;

-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
import java.util.Map;
-import java.util.Stack;

-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.optimizer.PrunerExpressionOperatorFactory;
+import org.apache.hadoop.hive.ql.optimizer.PrunerUtils;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;

/**
* Expression processor factory for partition pruning. Each processor tries to
* convert the expression subtree into a partition pruning expression. This
* expression is then used to figure out whether a particular partition should
* be scanned or not.
+ *
+ * * Refactor:
+ * Move main logic to PrunerExpressionOperatorFactory. ExprProcFactory extends it to reuse logic.
+ *
+ * Any other pruner can reuse it by creating a class extending from PrunerExpressionOperatorFactory.
+ *
+ * Only specific logic is in genPruner(..) which is in its own class like ExprProcFactory.
+ *
*/
-public final class ExprProcFactory {
+public final class ExprProcFactory extends PrunerExpressionOperatorFactory {

private ExprProcFactory() {
// prevent instantiation
}

/**
- * Processor for column expressions.
+ * Processor for ppr column expressions.
*/
- public static class ColumnExprProcessor implements NodeProcessor {
+ public static class PPRColumnExprProcessor extends ColumnExprProcessor {

@Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
-
- ExprNodeDesc newcd = null;
- ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
+ protected ExprNodeDesc processColumnDesc(NodeProcessorCtx procCtx, ExprNodeColumnDesc cd) {
+ ExprNodeDesc newcd;
ExprProcCtx epc = (ExprProcCtx) procCtx;
if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias())
&& cd.getIsPartitionColOrVirtualCol()) {
@@ -74,154 +66,22 @@ public final class ExprProcFactory {
newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null);
epc.setHasNonPartCols(true);
}
-
return newcd;
}
-
}

/**
- * If all children are candidates and refer only to one table alias then this
- * expr is a candidate else it is not a candidate but its children could be
- * final candidates.
- */
- public static class GenericFuncExprProcessor implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
-
- ExprNodeDesc newfd = null;
- ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
-
- boolean unknown = false;
-
- if (FunctionRegistry.isOpAndOrNot(fd)) {
- // do nothing because "And" and "Or" and "Not" supports null value
- // evaluation
- // NOTE: In the future all UDFs that treats null value as UNKNOWN (both
- // in parameters and return
- // values) should derive from a common base class UDFNullAsUnknown, so
- // instead of listing the classes
- // here we would test whether a class is derived from that base class.
- // If All childs are null, set unknown to true
- boolean isAllNull = true;
- for (Object child : nodeOutputs) {
- ExprNodeDesc child_nd = (ExprNodeDesc) child;
- if (!(child_nd instanceof ExprNodeConstantDesc
- && ((ExprNodeConstantDesc) child_nd).getValue() == null)) {
- isAllNull = false;
- }
- }
- unknown = isAllNull;
- } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
- // If it's a non-deterministic UDF, set unknown to true
- unknown = true;
- } else {
- // If any child is null, set unknown to true
- for (Object child : nodeOutputs) {
- ExprNodeDesc child_nd = (ExprNodeDesc) child;
- if (child_nd instanceof ExprNodeConstantDesc
- && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
- unknown = true;
- }
- }
- }
-
- if (unknown) {
- newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null);
- } else {
- // Create the list of children
- ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
- for (Object child : nodeOutputs) {
- children.add((ExprNodeDesc) child);
- }
- // Create a copy of the function descriptor
- newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd
- .getGenericUDF(), children);
- }
-
- return newfd;
- }
-
- }
-
- /**
- * FieldExprProcessor.
+ * Instantiate column processor.
*
+ * @return
*/
- public static class FieldExprProcessor implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
-
- ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd;
- boolean unknown = false;
- int idx = 0;
- ExprNodeDesc left_nd = null;
- for (Object child : nodeOutputs) {
- ExprNodeDesc child_nd = (ExprNodeDesc) child;
- if (child_nd instanceof ExprNodeConstantDesc
- && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
- unknown = true;
- }
- left_nd = child_nd;
- }
-
- assert (idx == 0);
-
- ExprNodeDesc newnd = null;
- if (unknown) {
- newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null);
- } else {
- newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd
- .getFieldName(), fnd.getIsList());
- }
- return newnd;
- }
-
- }
-
- /**
- * Processor for constants and null expressions. For such expressions the
- * processor simply clones the exprNodeDesc and returns it.
- */
- public static class DefaultExprProcessor implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- if (nd instanceof ExprNodeConstantDesc) {
- return ((ExprNodeConstantDesc) nd).clone();
- } else if (nd instanceof ExprNodeNullDesc) {
- return ((ExprNodeNullDesc) nd).clone();
- }
-
- assert (false);
- return null;
- }
- }
-
- public static NodeProcessor getDefaultExprProcessor() {
- return new DefaultExprProcessor();
- }
-
- public static NodeProcessor getGenericFuncProcessor() {
- return new GenericFuncExprProcessor();
- }
-
- public static NodeProcessor getFieldProcessor() {
- return new FieldExprProcessor();
- }
-
public static NodeProcessor getColumnProcessor() {
- return new ColumnExprProcessor();
+ return new PPRColumnExprProcessor();
}

/**
* Generates the partition pruner for the expression tree.
- *
+ *
* @param tabAlias
* The table alias of the partition table that is being considered
* for pruning
@@ -237,30 +97,10 @@ public final class ExprProcFactory {
// Create the walker, the rules dispatcher and the context.
ExprProcCtx pprCtx = new ExprProcCtx(tabAlias);

- // create a walker which walks the tree in a DFS manner while maintaining
- // the operator stack. The dispatcher
- // generates the plan from the operator tree
- Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
- exprRules.put(
- new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
- getColumnProcessor());
- exprRules.put(
- new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
- getFieldProcessor());
- exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName()
- + "%"), getGenericFuncProcessor());
-
- // The dispatcher fires the processor corresponding to the closest matching
- // rule and passes the context along
- Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
- exprRules, pprCtx);
- GraphWalker egw = new DefaultGraphWalker(disp);
-
- List<Node> startNodes = new ArrayList<Node>();
- startNodes.add(pred);
+ /* Move common logic to PrunerUtils.walkExprTree(...) so that it can be reused. */
+ Map<Node, Object> outputMap = PrunerUtils.walkExprTree(pred, pprCtx, getColumnProcessor(),
+ getFieldProcessor(), getGenericFuncProcessor(), getDefaultExprProcessor());

- HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
- egw.startWalking(startNodes, outputMap);
hasNonPartCols = pprCtx.getHasNonPartCols();

// Get the exprNodeDesc corresponding to the first start node;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java Fri Oct 5 04:13:06 2012
@@ -18,17 +18,13 @@

package org.apache.hadoop.hive.ql.optimizer.ppr;

-import java.util.Map;
-import java.util.Stack;
-
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;

/**
@@ -37,50 +33,27 @@ import org.apache.hadoop.hive.ql.plan.Ex
* We then pass the filter to the partition pruner to construct a pruner for
* that table alias and store a mapping from the table scan operator to that
* pruner. We call that pruner later during plan generation.
+ *
+ *
+ * Refactor:
+ * Move main logic to PrunerOperatorFactory. OpProcFactory extends it to reuse logic.
+ *
+ * Any other pruner can reuse it by creating a class extending from PrunerOperatorFactory.
+ *
+ * Only specific logic is in generatePredicate(..) which is in its own class like OpProcFactory.
*/
-public final class OpProcFactory {
+public final class OpProcFactory extends PrunerOperatorFactory {

/**
* Determines the partition pruner for the filter. This is called only when
* the filter follows a table scan operator.
*/
- public static class FilterPPR implements NodeProcessor {
+ public static class FilterPPR extends FilterPruner {

@Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
+ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop,
+ TableScanOperator top) throws SemanticException, UDFArgumentException {
OpWalkerCtx owc = (OpWalkerCtx) procCtx;
- FilterOperator fop = (FilterOperator) nd;
- FilterOperator fop2 = null;
-
- // The stack contains either ... TS, Filter or
- // ... TS, Filter, Filter with the head of the stack being the rightmost
- // symbol. So we just pop out the two elements from the top and if the
- // second one of them is not a table scan then the operator on the top of
- // the stack is the Table scan operator.
- Node tmp = stack.pop();
- Node tmp2 = stack.pop();
- TableScanOperator top = null;
- if (tmp2 instanceof TableScanOperator) {
- top = (TableScanOperator) tmp2;
- } else {
- top = (TableScanOperator) stack.peek();
- fop2 = (FilterOperator) tmp2;
- }
- stack.push(tmp2);
- stack.push(tmp);
-
- // If fop2 exists (i.e this is not the top level filter and fop2 is not
- // a sampling filter then we ignore the current filter
- if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
- return null;
- }
-
- // ignore the predicate in case it is not a sampling predicate
- if (fop.getConf().getIsSamplingPred()) {
- return null;
- }
-
// Otherwise this is not a sampling predicate and we need to
ExprNodeDesc predicate = fop.getConf().getPredicate();
String alias = top.getConf().getAlias();
@@ -93,50 +66,14 @@ public final class OpProcFactory {

// Add the pruning predicate to the table scan operator
addPruningPred(owc.getOpToPartPruner(), top, ppr_pred);
-
- return null;
- }
-
- private void addPruningPred(Map<TableScanOperator, ExprNodeDesc> opToPPR,
- TableScanOperator top, ExprNodeDesc new_ppr_pred) throws UDFArgumentException {
- ExprNodeDesc old_ppr_pred = opToPPR.get(top);
- ExprNodeDesc ppr_pred = null;
- if (old_ppr_pred != null) {
- // or the old_ppr_pred and the new_ppr_pred
- ppr_pred = TypeCheckProcFactory.DefaultExprProcessor
- .getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred);
- } else {
- ppr_pred = new_ppr_pred;
- }
-
- // Put the mapping from table scan operator to ppr_pred
- opToPPR.put(top, ppr_pred);
-
- return;
}
- }
-
- /**
- * Default processor which just merges its children.
- */
- public static class DefaultPPR implements NodeProcessor {

- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- // Nothing needs to be done.
- return null;
- }
}

public static NodeProcessor getFilterProc() {
return new FilterPPR();
}

- public static NodeProcessor getDefaultProc() {
- return new DefaultPPR();
- }
-
private OpProcFactory() {
// prevent instantiation
}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Fri Oct 5 04:13:06 2012
@@ -37,22 +37,12 @@ import org.apache.hadoop.hive.ql.ErrorMs
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.PrunerUtils;
import org.apache.hadoop.hive.ql.optimizer.Transform;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
@@ -76,7 +66,7 @@ public class PartitionPruner implements

// The log
private static final Log LOG = LogFactory
- .getLog("hive.ql.optimizer.ppr.PartitionPruner");
+ .getLog("hive.ql.optimizer.ppr.PartitionPruner");

/*
* (non-Javadoc)
@@ -91,25 +81,9 @@ public class PartitionPruner implements
// create a the context for walking operators
OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner());

- Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- opRules.put(new RuleRegExp("R1",
- "(" + TableScanOperator.getOperatorName() + "%"
- + FilterOperator.getOperatorName() + "%)|("
- + TableScanOperator.getOperatorName() + "%"
- + FilterOperator.getOperatorName() + "%"
- + FilterOperator.getOperatorName() + "%)"),
- OpProcFactory.getFilterProc());
-
- // The dispatcher fires the processor corresponding to the closest matching
- // rule and passes the context along
- Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(),
- opRules, opWalkerCtx);
- GraphWalker ogw = new DefaultGraphWalker(disp);
-
- // Create a list of topop nodes
- ArrayList<Node> topNodes = new ArrayList<Node>();
- topNodes.addAll(pctx.getTopOps().values());
- ogw.startWalking(topNodes, null);
+ /* Move logic to PrunerUtils.walkOperatorTree() so that it can be reused. */
+ PrunerUtils.walkOperatorTree(pctx, opWalkerCtx, OpProcFactory.getFilterProc(),
+ OpProcFactory.getDefaultProc());
pctx.setHasNonPartCols(opWalkerCtx.getHasNonPartCols());

return pctx;

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 5, '12 at 4:13a
activeOct 5, '12 at 4:13a
posts1
users1
websitehive.apache.org

1 user in discussion

Namit: 1 post

People

Translate

site design / logo © 2021 Grokbase