FAQ
Author: cws
Date: Thu Oct 4 07:12:53 2012
New Revision: 1393935

URL: http://svn.apache.org/viewvc?rev=1393935&view=rev
Log:
Track tables and keys used in join and group by operators
(Sambavi Muthukrishnan via Carl Steinbach)

Summary:
The idea here is to walk the operator tree and identify join and group by operators
whose keys can be mapped back directly to tables and columns on those tables. This
information can be used to determine the characteristics of a workload and which
tables are accessed most often using the same keys for these operators that can
benefit from bucketing.

Test Plan: Added new test cases in TestCliDriver. Ran all existing unit tests and they passed.

Reviewers: njain, kevinwilfong, JIRA, cwsteinbach

Reviewed By: cwsteinbach

CC: cwsteinbach

Differential Revision: https://reviews.facebook.net/D5613

Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (with props)
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java (with props)
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java (with props)
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java (with props)
hive/trunk/ql/src/test/queries/clientpositive/table_access_keys_stats.q
hive/trunk/ql/src/test/results/clientpositive/table_access_keys_stats.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml.template
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Oct 4 07:12:53 2012
@@ -539,10 +539,12 @@ public class HiveConf extends Configurat
HIVE_STATS_RETRIES_WAIT("hive.stats.retries.wait",
3000), // # milliseconds to wait before the next retry
HIVE_STATS_COLLECT_RAWDATASIZE("hive.stats.collect.rawdatasize", true),
- // should the raw data size be collected when analayzing tables
+ // should the raw data size be collected when analyzing tables
CLIENT_STATS_COUNTERS("hive.client.stats.counters", ""),
//Subset of counters that should be of interest for hive.client.stats.publishers (when one wants to limit their publishing). Non-display names should be used".
HIVE_STATS_RELIABLE("hive.stats.reliable", false),
+ // Collect table access keys information for operators that can benefit from bucketing
+ HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false),

// Concurrency
HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Thu Oct 4 07:12:53 2012
@@ -1027,6 +1027,14 @@
</property>

<property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and maintained in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine if they should be bucketed.
+ </description>
+</property>
+
+<property>
<name>hive.support.concurrency</name>
<value>false</value>
<description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java Thu Oct 4 07:12:53 2012
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.hooks.L
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.TableAccessInfo;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReducerTimeStatsPerJob;
import org.apache.hadoop.hive.ql.plan.api.AdjacencyType;
@@ -82,6 +83,7 @@ public class QueryPlan implements Serial
* Lineage information for the query.
*/
protected LineageInfo linfo;
+ private TableAccessInfo tableAccessInfo;

private HashMap<String, String> idToTableNameMap;

@@ -110,6 +112,7 @@ public class QueryPlan implements Serial
inputs = sem.getInputs();
outputs = sem.getOutputs();
linfo = sem.getLineageInfo();
+ tableAccessInfo = sem.getTableAccessInfo();
idToTableNameMap = new HashMap<String, String>(sem.getIdToTableNameMap());

queryId = makeQueryId();
@@ -756,6 +759,24 @@ public class QueryPlan implements Serial
this.linfo = linfo;
}

+ /**
+ * Gets the table access information.
+ *
+ * @return TableAccessInfo associated with the query.
+ */
+ public TableAccessInfo getTableAccessInfo() {
+ return tableAccessInfo;
+ }
+
+ /**
+ * Sets the table access information.
+ *
+ * @param taInfo The TableAccessInfo structure that is set right before the optimization phase.
+ */
+ public void setTableAccessInfo(TableAccessInfo tableAccessInfo) {
+ this.tableAccessInfo = tableAccessInfo;
+ }
+
public QueryProperties getQueryProperties() {
return queryProperties;
}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Oct 4 07:12:53 2012
@@ -1404,6 +1404,11 @@ public abstract class Operator<T extends
return ret;
}

+ /*
+ * True only for operators which produce atmost 1 output row per input
+ * row to it. This will allow the output column names to be directly
+ * translated to input column names.
+ */
public boolean columnNamesRowResolvedCanBeObtained() {
return false;
}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Thu Oct 4 07:12:53 2012
@@ -93,6 +93,7 @@ public abstract class BaseSemanticAnalyz
* Lineage information for the query.
*/
protected LineageInfo linfo;
+ protected TableAccessInfo tableAccessInfo;

protected static final String TEXTFILE_INPUT = TextInputFormat.class
.getName();
@@ -796,6 +797,24 @@ public abstract class BaseSemanticAnalyz
this.linfo = linfo;
}

+ /**
+ * Gets the table access information.
+ *
+ * @return TableAccessInfo associated with the query.
+ */
+ public TableAccessInfo getTableAccessInfo() {
+ return tableAccessInfo;
+ }
+
+ /**
+ * Sets the table access information.
+ *
+ * @param taInfo The TableAccessInfo structure that is set in the optimization phase.
+ */
+ public void setTableAccessInfo(TableAccessInfo tableAccessInfo) {
+ this.tableAccessInfo = tableAccessInfo;
+ }
+
protected HashMap<String, String> extractPartitionSpecs(Tree partspec)
throws SemanticException {
HashMap<String, String> partSpec = new LinkedHashMap<String, String>();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1393935&r1=1393934&r2=1393935&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Oct 4 07:12:53 2012
@@ -7666,6 +7666,12 @@ public class SemanticAnalyzer extends Ba
listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToSkewedPruner);

+ // Generate table access stats if required
+ if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) {
+ TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
+ setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
+ }
+
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java Thu Oct 4 07:12:53 2012
@@ -0,0 +1,331 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+
+/**
+ * TableAccessAnalyzer walks the operator graph from joins and group bys
+ * to the table scan operator backing it. It checks whether the operators
+ * in the path are pass-through of the base table (no aggregations/joins),
+ * and if the keys are mapped by expressions that do not modify the bucket
+ * for the key. If all the keys for a join/group by are clean pass-through
+ * of the base table columns, we can consider this operator as a candidate
+ * for improvement through bucketing.
+ */
+public class TableAccessAnalyzer {
+ private static final Log LOG = LogFactory.getLog(TableAccessAnalyzer.class.getName());
+ private final ParseContext pGraphContext;
+
+ public TableAccessAnalyzer() {
+ pGraphContext = null;
+ }
+
+ public TableAccessAnalyzer(ParseContext pactx) {
+ pGraphContext = pactx;
+ }
+
+ public TableAccessInfo analyzeTableAccess() throws SemanticException {
+
+ // Set up the rules for the graph walker for group by and join operators
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"),
+ new GroupByProcessor(pGraphContext));
+ opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"),
+ new JoinProcessor(pGraphContext));
+ opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"),
+ new JoinProcessor(pGraphContext));
+
+ TableAccessCtx tableAccessCtx = new TableAccessCtx();
+ Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+
+ // Create a list of topop nodes and walk!
+ List<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(pGraphContext.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+
+ return tableAccessCtx.getTableAccessInfo();
+ }
+
+ private NodeProcessor getDefaultProc() {
+ return new NodeProcessor() {
+ @Override
+ public Object process(Node nd, Stack<Node> stack,
+ NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
+ return null;
+ }
+ };
+ }
+
+ /**
+ * Processor for GroupBy operator
+ */
+ public class GroupByProcessor implements NodeProcessor {
+ protected ParseContext pGraphContext;
+
+ public GroupByProcessor(ParseContext pGraphContext) {
+ this.pGraphContext = pGraphContext;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) {
+ GroupByOperator op = (GroupByOperator)nd;
+ TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx;
+
+ // Get the key column names, and check if the keys are all constants
+ // or columns (not expressions). If yes, proceed.
+ List<String> keyColNames =
+ TableAccessAnalyzer.getKeyColNames(op.getConf().getKeys());
+
+ if (keyColNames == null) {
+ // we are done, since there are no keys to check for
+ return null;
+ }
+
+ // Walk the operator tree to the TableScan and build the mapping
+ // along the way for the columns that the group by uses as keys
+ TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
+ op.getParentOperators().get(0), keyColNames);
+
+ if (tso == null) {
+ // Could not find an allowed path to a table scan operator,
+ // hence we are done
+ return null;
+ }
+
+ Map<String, List<String>> tableToKeysMap = new HashMap<String, List<String>>();
+ Table tbl = pGraphContext.getTopToTable().get(tso);
+ tableToKeysMap.put(tbl.getCompleteName(), keyColNames);
+ tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap);
+
+ return null;
+ }
+ }
+
+ /**
+ * Processor for Join operator.
+ */
+ public class JoinProcessor implements NodeProcessor {
+ protected ParseContext pGraphContext;
+
+ public JoinProcessor(ParseContext pGraphContext) {
+ this.pGraphContext = pGraphContext;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) {
+ JoinOperator op = (JoinOperator)nd;
+ TableAccessCtx tableAccessCtx = (TableAccessCtx)procCtx;
+ Map<String, List<String>> tableToKeysMap = new HashMap<String, List<String>>();
+
+ List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
+
+ // Get the key column names for each side of the join,
+ // and check if the keys are all constants
+ // or columns (not expressions). If yes, proceed.
+ QBJoinTree joinTree = pGraphContext.getJoinContext().get(op);
+ assert(parentOps.size() == joinTree.getBaseSrc().length);
+ int pos = 0;
+ for (String src : joinTree.getBaseSrc()) {
+ if (src != null) {
+ assert(parentOps.get(pos) instanceof ReduceSinkOperator);
+ ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) parentOps.get(pos);
+
+ // Get the key column names, and check if the keys are all constants
+ // or columns (not expressions). If yes, proceed.
+ List<String> keyColNames =
+ TableAccessAnalyzer.getKeyColNames(reduceSinkOp.getConf().getKeyCols());
+
+ if (keyColNames == null) {
+ // we are done, since there are no keys to check for
+ return null;
+ }
+
+ // Walk the operator tree to the TableScan and build the mapping
+ // along the way for the columns that the group by uses as keys
+ TableScanOperator tso = TableAccessAnalyzer.genRootTableScan(
+ reduceSinkOp.getParentOperators().get(0), keyColNames);
+
+ if (tso == null) {
+ // Could not find an allowed path to a table scan operator,
+ // hence we are done
+ return null;
+ }
+
+ Table tbl = pGraphContext.getTopToTable().get(tso);
+ tableToKeysMap.put(tbl.getCompleteName(), keyColNames);
+ } else {
+ return null;
+ }
+ pos++;
+ }
+
+ // We only get here if we could map all join keys to source table columns
+ tableAccessCtx.addOperatorTableAccess(op, tableToKeysMap);
+ return null;
+ }
+ }
+
+ /**
+ * This method traces up from the given operator to the root
+ * of the operator graph until a TableScanOperator is reached.
+ * Along the way, if any operators are present that do not
+ * provide a direct mapping from columns of the base table to
+ * the keys on the input operator, the trace-back is stopped at that
+ * point. If the trace back can be done successfully, the method
+ * returns the root TableScanOperator as well as the list of column
+ * names on that table that map to the keys used for the input
+ * operator (which is currently only a join or group by).
+ */
+ private static TableScanOperator genRootTableScan(
+ Operator<? extends OperatorDesc> op, List<String> keyNames) {
+
+ boolean complexTree = false;
+ Operator<? extends OperatorDesc> currOp = op;
+ List<String> currColNames = keyNames;
+ List<Operator<? extends OperatorDesc>> parentOps = null;
+
+ // Track as you walk up the tree if there is an operator
+ // along the way that changes the rows from the table through
+ // joins or aggregations. Only allowed operators are selects
+ // and filters.
+ while (!complexTree) {
+ parentOps = currOp.getParentOperators();
+ if (parentOps == null) {
+ break;
+ }
+
+ if (parentOps.size() > 1 ||
+ !(currOp.columnNamesRowResolvedCanBeObtained())) {
+ complexTree = true;
+ } else {
+ // Generate the map of the input->output column name for the keys
+ // we are about
+ if (!TableAccessAnalyzer.genColNameMap(currOp, currColNames)) {
+ complexTree = true;
+ }
+ currOp = parentOps.get(0);
+ }
+ }
+
+ return complexTree? null: (TableScanOperator) currOp;
+ }
+
+ /*
+ * This method takes in an input operator and a subset of its output
+ * column names, and generates the input column names for the operator
+ * corresponding to those outputs. If the mapping from the input column
+ * name to the output column name is not simple, the method returns
+ * false, else it returns true. The list of output column names is
+ * modified by this method to be the list of corresponding input column
+ * names.
+ */
+ private static boolean genColNameMap(
+ Operator<? extends OperatorDesc> op, List<String> currColNames) {
+
+ List<ExprNodeDesc> colList = null;
+ List<String> outputColNames = null;
+
+ assert(op.columnNamesRowResolvedCanBeObtained());
+ // Only select operators among the allowed operators can cause changes in the
+ // column names
+ if (op instanceof SelectOperator) {
+ SelectDesc selectDesc = ((SelectOperator)op).getConf();
+ if (!selectDesc.isSelStarNoCompute()) {
+ colList = selectDesc.getColList();
+ outputColNames = selectDesc.getOutputColumnNames();
+
+ // Only columns and constants can be selected
+ for (int pos = 0; pos < colList.size(); pos++) {
+ ExprNodeDesc colExpr = colList.get(pos);
+ String outputColName = outputColNames.get(pos);
+
+ // If it is not a column we need for the keys, move on
+ if (!currColNames.contains(outputColName)) {
+ continue;
+ }
+
+ if ((colExpr instanceof ExprNodeConstantDesc) ||
+ (colExpr instanceof ExprNodeNullDesc)) {
+ currColNames.remove(outputColName);
+ continue;
+ } else if (colExpr instanceof ExprNodeColumnDesc) {
+ String inputColName = ((ExprNodeColumnDesc) colExpr).getColumn();
+ if (!outputColName.equals(inputColName)) {
+ currColNames.set(currColNames.indexOf(outputColName), inputColName);
+ }
+ } else {
+ // the column map can not be generated
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private static List<String> getKeyColNames(List<ExprNodeDesc> keys) {
+ List<String> colList = new ArrayList<String>();
+ for (ExprNodeDesc expr: keys) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc)expr;
+ colList.add(colExpr.getColumn());
+ } else if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeNullDesc) {
+ continue;
+ } else {
+ return null;
+ }
+ }
+ return colList;
+ }
+}

Propchange: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java Thu Oct 4 07:12:53 2012
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * This class implements the context information that is used for typechecking
+ * phase in query compilation.
+ */
+public class TableAccessCtx implements NodeProcessorCtx {
+
+ /**
+ * Map of operator id to table and key names.
+ */
+ private final TableAccessInfo tableAccessInfo;
+
+ /**
+ * Constructor.
+ */
+ public TableAccessCtx() {
+ tableAccessInfo = new TableAccessInfo();
+ }
+
+ public TableAccessInfo getTableAccessInfo() {
+ return tableAccessInfo;
+ }
+
+ public void addOperatorTableAccess(Operator<? extends OperatorDesc> op,
+ Map<String, List<String>> tableToKeysMap) {
+ assert(tableToKeysMap != null);
+ assert(op != null);
+ tableAccessInfo.add(op, tableToKeysMap);
+ }
+
+}

Propchange: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessCtx.java
------------------------------------------------------------------------------
svn:eol-style = native

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java Thu Oct 4 07:12:53 2012
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+public class TableAccessInfo {
+ /**
+ * Map of operator id to table and key names.
+ */
+ private final Map<Operator<? extends OperatorDesc>,
+ Map<String, List<String>>> operatorToTableAccessMap;
+
+ public TableAccessInfo() {
+ operatorToTableAccessMap =
+ new HashMap<Operator<? extends OperatorDesc>, Map<String, List<String>>>();
+ }
+
+ public void add(Operator<? extends OperatorDesc> op,
+ Map<String, List<String>> tableToKeysMap) {
+ operatorToTableAccessMap.put(op, tableToKeysMap);
+ }
+
+ public Map<Operator<? extends OperatorDesc>,
+ Map<String, List<String>>> getOperatorToTableAccessMap() {
+ return operatorToTableAccessMap;
+ }
+}

Propchange: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TableAccessInfo.java
------------------------------------------------------------------------------
svn:eol-style = native

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java Thu Oct 4 07:12:53 2012
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.parse.TableAccessInfo;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/*
+ * This hook is used for verifying the table access key information
+ * that is generated and maintained in the QueryPlan object by the
+ * TableAccessAnalyer. All the hook does is print out the table/keys
+ * per operator recorded in the TableAccessInfo in the QueryPlan.
+ */
+public class CheckTableAccessHook implements ExecuteWithHookContext {
+
+ public void run(HookContext hookContext) {
+ HiveConf conf = hookContext.getConf();
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == false) {
+ return;
+ }
+
+ QueryPlan plan = hookContext.getQueryPlan();
+ if (plan == null) {
+ return;
+ }
+
+ TableAccessInfo tableAccessInfo = hookContext.getQueryPlan().getTableAccessInfo();
+ if (tableAccessInfo == null ||
+ tableAccessInfo.getOperatorToTableAccessMap() == null ||
+ tableAccessInfo.getOperatorToTableAccessMap().isEmpty()) {
+ return;
+ }
+
+ LogHelper console = SessionState.getConsole();
+ Map<Operator<? extends OperatorDesc>, Map<String, List<String>>> operatorToTableAccessMap =
+ tableAccessInfo.getOperatorToTableAccessMap();
+
+ // We need a new map to ensure output is always produced in the same order.
+ // This makes tests that use this hook deterministic.
+ Map<String, String> outputOrderedMap = new HashMap<String, String>();
+
+ for (Map.Entry<Operator<? extends OperatorDesc>, Map<String, List<String>>> tableAccess:
+ operatorToTableAccessMap.entrySet()) {
+ StringBuilder perOperatorInfo = new StringBuilder();
+ perOperatorInfo.append("Operator:").append(tableAccess.getKey().getOperatorId())
+ .append("\n");
+ for (Map.Entry<String, List<String>> entry: tableAccess.getValue().entrySet()) {
+ perOperatorInfo.append("Table:").append(entry.getKey()).append("\n");
+ perOperatorInfo.append("Keys:").append(StringUtils.join(entry.getValue(), ','))
+ .append("\n");
+ }
+ outputOrderedMap.put(tableAccess.getKey().getOperatorId(), perOperatorInfo.toString());
+ }
+
+ for (String perOperatorInfo: outputOrderedMap.values()) {
+ console.printError(perOperatorInfo);
+ }
+ }
+}

Propchange: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckTableAccessHook.java
------------------------------------------------------------------------------
svn:eol-style = native

Added: hive/trunk/ql/src/test/queries/clientpositive/table_access_keys_stats.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/table_access_keys_stats.q?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/table_access_keys_stats.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/table_access_keys_stats.q Thu Oct 4 07:12:53 2012
@@ -0,0 +1,216 @@
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.CheckTableAccessHook;
+SET hive.stats.collect.tablekeys=true;
+
+-- This test is used for testing the TableAccessAnalyzer
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
+
+-- Simple group-by queries
+SELECT key, count(1) FROM T1 GROUP BY key;
+SELECT key, val, count(1) FROM T1 GROUP BY key, val;
+
+-- With subqueries and column aliases
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key;
+SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k;
+
+-- With constants
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
+SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val;
+SELECT key, 1, val, 2, count(1) FROM T1 GROUP BY key, 1, val, 2;
+
+-- no mapping with functions
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1;
+
+SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key;
+
+-- group by followed by union
+SELECT * FROM (
+SELECT key, count(1) as c FROM T1 GROUP BY key
+ UNION ALL
+SELECT key, count(1) as c FROM T1 GROUP BY key
+) subq1;
+
+-- group by followed by a join
+SELECT * FROM
+(SELECT key, count(1) as c FROM T1 GROUP BY key) subq1
+JOIN
+(SELECT key, count(1) as c FROM T1 GROUP BY key) subq2
+ON subq1.key = subq2.key;
+
+SELECT * FROM
+(SELECT key, count(1) FROM T1 GROUP BY key) subq1
+JOIN
+(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2
+ON subq1.key = subq2.key;
+
+-- constants from sub-queries should work fine
+SELECT key, constant, val, count(1) from
+(SELECT key, 1 as constant, val from T1) subq1
+group by key, constant, val;
+
+-- multiple levels of constants from sub-queries should work fine
+SELECT key, constant3, val, count(1) FROM
+(
+ SELECT key, constant AS constant2, val, 2 AS constant3
+ FROM
+ (
+ SELECT key, 1 AS constant, val
+ FROM T1
+ ) subq
+) subq2
+GROUP BY key, constant3, val;
+
+-- work with insert overwrite
+FROM T1
+INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key, 1
+INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key;
+
+-- simple joins
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = t2.key;
+
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = T2.val;
+
+-- map join
+SELECT /*+ MAPJOIN(a) */ *
+FROM T1 a JOIN T2 b
+ON a.key = b.key;
+
+-- with constant in join condition
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3;
+
+-- subqueries
+SELECT *
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val;
+
+SELECT *
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+T2
+ON subq1.val = T2.val;
+
+-- with column aliases in subqueries
+SELECT *
+FROM
+(
+ SELECT val as v FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.v = subq2.val;
+
+-- with constants in subqueries
+SELECT *
+FROM
+(
+ SELECT key, val FROM T1
+) subq1
+JOIN
+(
+ SELECT key, 'teststring' as val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key = subq2.key;
+
+-- multiple levels of constants in subqueries
+SELECT *
+FROM
+(
+ SELECT key, val from
+ (
+ SELECT key, 'teststring' as val from T1
+ ) subq1
+) subq2
+JOIN
+(
+ SELECT key, val FROM T2
+) subq3
+ON subq3.val = subq2.val AND subq3.key = subq2.key;
+
+-- no mapping on functions
+SELECT *
+FROM
+(
+ SELECT key, val from T1
+) subq1
+JOIN
+(
+ SELECT key, val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key + 1 = subq2.key;
+
+-- join followed by group by
+SELECT subq1.val, COUNT(*)
+FROM
+(
+ SELECT key, val FROM T1
+) subq1
+JOIN
+(
+ SELECT key, 'teststring' as val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key = subq2.key
+GROUP BY subq1.val;
+
+-- join followed by union
+SELECT *
+FROM
+(
+ SELECT subq1.val, COUNT(*)
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.val = subq2.val AND subq1.key = subq2.key
+ GROUP BY subq1.val
+ UNION ALL
+ SELECT val, COUNT(*)
+ FROM T3
+ GROUP BY val
+) subq4;
+
+-- join followed by join
+SELECT *
+FROM
+(
+ SELECT subq1.val as val, COUNT(*)
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.val = subq2.val AND subq1.key = subq2.key
+ GROUP by subq1.val
+) T4
+JOIN T3
+ON T3.val = T4.val;

Added: hive/trunk/ql/src/test/results/clientpositive/table_access_keys_stats.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/table_access_keys_stats.q.out?rev=1393935&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/table_access_keys_stats.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/table_access_keys_stats.q.out Thu Oct 4 07:12:53 2012
@@ -0,0 +1,539 @@
+PREHOOK: query: -- This test is used for testing the TableAccessAnalyzer
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: -- Simple group-by queries
+SELECT key, count(1) FROM T1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key,val
+
+1 11 1
+2 12 1
+3 13 1
+7 17 1
+8 18 1
+8 28 1
+PREHOOK: query: -- With subqueries and column aliases
+SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_3
+Table:default@t1
+Keys:key
+
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_3
+Table:default@t1
+Keys:key
+
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: -- With constants
+SELECT 1, key, count(1) FROM T1 GROUP BY 1, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+1 1 1
+1 2 1
+1 3 1
+1 7 1
+1 8 2
+PREHOOK: query: SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key,val
+
+1 1 11 1
+2 1 12 1
+3 1 13 1
+7 1 17 1
+8 1 18 1
+8 1 28 1
+PREHOOK: query: SELECT key, 1, val, 2, count(1) FROM T1 GROUP BY key, 1, val, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key,val
+
+1 1 11 2 1
+2 1 12 2 1
+3 1 13 2 1
+7 1 17 2 1
+8 1 18 2 1
+8 1 28 2 1
+PREHOOK: query: -- no mapping with functions
+SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 2.0 1
+2 3.0 1
+3 4.0 1
+7 8.0 1
+8 9.0 2
+PREHOOK: query: SELECT key + key, sum(cnt) from
+(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
+group by key + key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+2.0 1
+4.0 1
+6.0 1
+14.0 1
+16.0 2
+PREHOOK: query: -- group by followed by union
+SELECT * FROM (
+SELECT key, count(1) as c FROM T1 GROUP BY key
+ UNION ALL
+SELECT key, count(1) as c FROM T1 GROUP BY key
+) subq1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+Operator:GBY_8
+Table:default@t1
+Keys:key
+
+1 1
+2 1
+3 1
+7 1
+8 2
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: -- group by followed by a join
+SELECT * FROM
+(SELECT key, count(1) as c FROM T1 GROUP BY key) subq1
+JOIN
+(SELECT key, count(1) as c FROM T1 GROUP BY key) subq2
+ON subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+Operator:GBY_8
+Table:default@t1
+Keys:key
+
+1 1 1 1
+2 1 2 1
+3 1 3 1
+7 1 7 1
+8 2 8 2
+PREHOOK: query: SELECT * FROM
+(SELECT key, count(1) FROM T1 GROUP BY key) subq1
+JOIN
+(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2
+ON subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_2
+Table:default@t1
+Keys:key,val
+
+Operator:GBY_8
+Table:default@t1
+Keys:key
+
+1 1 1 11 1
+2 1 2 12 1
+3 1 3 13 1
+7 1 7 17 1
+8 2 8 18 1
+8 2 8 28 1
+PREHOOK: query: -- constants from sub-queries should work fine
+SELECT key, constant, val, count(1) from
+(SELECT key, 1 as constant, val from T1) subq1
+group by key, constant, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_3
+Table:default@t1
+Keys:key,val
+
+1 1 11 1
+2 1 12 1
+3 1 13 1
+7 1 17 1
+8 1 18 1
+8 1 28 1
+PREHOOK: query: -- multiple levels of constants from sub-queries should work fine
+SELECT key, constant3, val, count(1) FROM
+(
+ SELECT key, constant AS constant2, val, 2 AS constant3
+ FROM
+ (
+ SELECT key, 1 AS constant, val
+ FROM T1
+ ) subq
+) subq2
+GROUP BY key, constant3, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Operator:GBY_4
+Table:default@t1
+Keys:key,val
+
+1 2 11 1
+2 2 12 1
+3 2 13 1
+7 2 17 1
+8 2 18 1
+8 2 28 1
+PREHOOK: query: -- work with insert overwrite
+FROM T1
+INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key, 1
+INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
+Operator:GBY_2
+Table:default@t1
+Keys:key
+
+Operator:GBY_8
+Table:default@t1
+Keys:key
+
+PREHOOK: query: -- simple joins
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = t2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_4
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:key
+
+1 11 1 1
+2 12 2 1
+3 13 3 1
+7 17 7 1
+8 18 8 2
+8 28 8 2
+PREHOOK: query: SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = T2.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_4
+Table:default@t2
+Keys:key,val
+Table:default@t1
+Keys:key,val
+
+PREHOOK: query: -- map join
+SELECT /*+ MAPJOIN(a) */ *
+FROM T1 a JOIN T2 b
+ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_4
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:key
+
+1 11 1 1
+2 12 2 1
+3 13 3 1
+7 17 7 1
+8 18 8 2
+8 28 8 2
+PREHOOK: query: -- with constant in join condition
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_6
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:key
+
+PREHOOK: query: -- subqueries
+SELECT *
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_8
+Table:default@t2
+Keys:val
+Table:default@t1
+Keys:val
+
+PREHOOK: query: SELECT *
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+T2
+ON subq1.val = T2.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_6
+Table:default@t2
+Keys:val
+Table:default@t1
+Keys:val
+
+PREHOOK: query: -- with column aliases in subqueries
+SELECT *
+FROM
+(
+ SELECT val as v FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.v = subq2.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_8
+Table:default@t2
+Keys:val
+Table:default@t1
+Keys:val
+
+PREHOOK: query: -- with constants in subqueries
+SELECT *
+FROM
+(
+ SELECT key, val FROM T1
+) subq1
+JOIN
+(
+ SELECT key, 'teststring' as val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_6
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:val,key
+
+PREHOOK: query: -- multiple levels of constants in subqueries
+SELECT *
+FROM
+(
+ SELECT key, val from
+ (
+ SELECT key, 'teststring' as val from T1
+ ) subq1
+) subq2
+JOIN
+(
+ SELECT key, val FROM T2
+) subq3
+ON subq3.val = subq2.val AND subq3.key = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_7
+Table:default@t2
+Keys:val,key
+Table:default@t1
+Keys:key
+
+PREHOOK: query: -- no mapping on functions
+SELECT *
+FROM
+(
+ SELECT key, val from T1
+) subq1
+JOIN
+(
+ SELECT key, val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key + 1 = subq2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+PREHOOK: query: -- join followed by group by
+SELECT subq1.val, COUNT(*)
+FROM
+(
+ SELECT key, val FROM T1
+) subq1
+JOIN
+(
+ SELECT key, 'teststring' as val FROM T2
+) subq2
+ON subq1.val = subq2.val AND subq1.key = subq2.key
+GROUP BY subq1.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Operator:JOIN_6
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:val,key
+
+PREHOOK: query: -- join followed by union
+SELECT *
+FROM
+(
+ SELECT subq1.val, COUNT(*)
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.val = subq2.val AND subq1.key = subq2.key
+ GROUP BY subq1.val
+ UNION ALL
+ SELECT val, COUNT(*)
+ FROM T3
+ GROUP BY val
+) subq4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+Operator:GBY_14
+Table:default@t3
+Keys:val
+
+Operator:JOIN_6
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:val,key
+
+11.0 1
+12.0 1
+13.0 1
+17.0 1
+46.0 1
+PREHOOK: query: -- join followed by join
+SELECT *
+FROM
+(
+ SELECT subq1.val as val, COUNT(*)
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.val = subq2.val AND subq1.key = subq2.key
+ GROUP by subq1.val
+) T4
+JOIN T3
+ON T3.val = T4.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+Operator:JOIN_6
+Table:default@t2
+Keys:key
+Table:default@t1
+Keys:val,key
+

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 4, '12 at 7:13a
activeOct 4, '12 at 7:13a
posts1
users1
websitehive.apache.org

1 user in discussion

Cws: 1 post

People

Translate

site design / logo © 2021 Grokbase