FAQ
Author: kevinwilfong
Date: Wed Jan 30 21:52:58 2013
New Revision: 1440695

URL: http://svn.apache.org/viewvc?rev=1440695&view=rev
Log:
HIVE-3940. Track columns accessed in each table in a query. (Samuel Yuan via kevinwilfong)

Added:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java
     hive/trunk/ql/src/test/queries/clientpositive/column_access_stats.q
     hive/trunk/ql/src/test/results/clientpositive/column_access_stats.q.out
Modified:
     hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
     hive/trunk/conf/hive-default.xml.template
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1440695&r1=1440694&r2=1440695&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Jan 30 21:52:58 2013
@@ -566,6 +566,8 @@ public class HiveConf extends Configurat
      HIVE_STATS_RELIABLE("hive.stats.reliable", false),
      // Collect table access keys information for operators that can benefit from bucketing
      HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false),
+ // Collect column access information
+ HIVE_STATS_COLLECT_SCANCOLS("hive.stats.collect.scancols", false),
      // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a
      // higher compute cost.
      HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1440695&r1=1440694&r2=1440695&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Wed Jan 30 21:52:58 2013
@@ -1138,6 +1138,14 @@
  </property>

  <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed.
+ </description>
+</property>
+
+<property>
    <name>hive.stats.ndv.error</name>
    <value>20.0</value>
    <description>Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost.A lower value for error indicates higher accuracy and a higher compute cost.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java?rev=1440695&r1=1440694&r2=1440695&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java Wed Jan 30 21:52:58 2013
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.hooks.L
  import org.apache.hadoop.hive.ql.hooks.ReadEntity;
  import org.apache.hadoop.hive.ql.hooks.WriteEntity;
  import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
  import org.apache.hadoop.hive.ql.parse.TableAccessInfo;
  import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  import org.apache.hadoop.hive.ql.plan.ReducerTimeStatsPerJob;
@@ -84,6 +85,7 @@ public class QueryPlan implements Serial
     */
    protected LineageInfo linfo;
    private TableAccessInfo tableAccessInfo;
+ private ColumnAccessInfo columnAccessInfo;

    private HashMap<String, String> idToTableNameMap;

@@ -113,6 +115,7 @@ public class QueryPlan implements Serial
      outputs = sem.getOutputs();
      linfo = sem.getLineageInfo();
      tableAccessInfo = sem.getTableAccessInfo();
+ columnAccessInfo = sem.getColumnAccessInfo();
      idToTableNameMap = new HashMap<String, String>(sem.getIdToTableNameMap());

      queryId = makeQueryId();
@@ -777,6 +780,25 @@ public class QueryPlan implements Serial
      this.tableAccessInfo = tableAccessInfo;
    }

+ /**
+ * Gets the column access information.
+ *
+ * @return ColumnAccessInfo associated with the query.
+ */
+ public ColumnAccessInfo getColumnAccessInfo() {
+ return columnAccessInfo;
+ }
+
+ /**
+ * Sets the column access information.
+ *
+ * @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after
+ * the optimization phase.
+ */
+ public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) {
+ this.columnAccessInfo = columnAccessInfo;
+ }
+
    public QueryProperties getQueryProperties() {
      return queryProperties;
    }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1440695&r1=1440694&r2=1440695&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Wed Jan 30 21:52:58 2013
@@ -97,6 +97,7 @@ public abstract class BaseSemanticAnalyz
     */
    protected LineageInfo linfo;
    protected TableAccessInfo tableAccessInfo;
+ protected ColumnAccessInfo columnAccessInfo;

    protected static final String TEXTFILE_INPUT = TextInputFormat.class
        .getName();
@@ -830,6 +831,25 @@ public abstract class BaseSemanticAnalyz
      this.tableAccessInfo = tableAccessInfo;
    }

+ /**
+ * Gets the column access information.
+ *
+ * @return ColumnAccessInfo associated with the query.
+ */
+ public ColumnAccessInfo getColumnAccessInfo() {
+ return columnAccessInfo;
+ }
+
+ /**
+ * Sets the column access information.
+ *
+ * @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after
+ * the optimization phase.
+ */
+ public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) {
+ this.columnAccessInfo = columnAccessInfo;
+ }
+
    protected HashMap<String, String> extractPartitionSpecs(Tree partspec)
        throws SemanticException {
      HashMap<String, String> partSpec = new LinkedHashMap<String, String>();

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java?rev=1440695&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java Wed Jan 30 21:52:58 2013
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.metadata.Table;
+
+public class ColumnAccessAnalyzer {
+ private static final Log LOG = LogFactory.getLog(ColumnAccessAnalyzer.class.getName());
+ private final ParseContext pGraphContext;
+
+ public ColumnAccessAnalyzer() {
+ pGraphContext = null;
+ }
+
+ public ColumnAccessAnalyzer(ParseContext pactx) {
+ pGraphContext = pactx;
+ }
+
+ public ColumnAccessInfo analyzeColumnAccess() throws SemanticException {
+ ColumnAccessInfo columnAccessInfo = new ColumnAccessInfo();
+ Map<TableScanOperator, Table> topOps = pGraphContext.getTopToTable();
+ for (TableScanOperator op : topOps.keySet()) {
+ Table table = topOps.get(op);
+ String tableName = table.getTableName();
+ List<FieldSchema> tableCols = table.getAllCols();
+ for (int i : op.getNeededColumnIDs()) {
+ columnAccessInfo.add(tableName, tableCols.get(i).getName());
+ }
+ }
+ return columnAccessInfo;
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java?rev=1440695&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java Wed Jan 30 21:52:58 2013
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class ColumnAccessInfo {
+ /**
+ * Map of table name to names of accessed columns
+ */
+ private final Map<String, Set<String>> tableToColumnAccessMap;
+
+ public ColumnAccessInfo() {
+ tableToColumnAccessMap = new HashMap<String, Set<String>>();
+ }
+
+ public void add(String table, String col) {
+ Set<String> tableColumns = tableToColumnAccessMap.get(table);
+ if (tableColumns == null) {
+ tableColumns = new HashSet<String>();
+ tableToColumnAccessMap.put(table, tableColumns);
+ }
+ tableColumns.add(col);
+ }
+
+ public Map<String, Set<String>> getTableToColumnAccessMap() {
+ return tableToColumnAccessMap;
+ }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1440695&r1=1440694&r2=1440695&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Jan 30 21:52:58 2013
@@ -8473,6 +8473,13 @@ public class SemanticAnalyzer extends Ba
      optm.initialize(conf);
      pCtx = optm.optimize();

+ // Generate column access stats if required - wait until column pruning takes place
+ // during optimization
+ if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == true) {
+ ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
+ setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess());
+ }
+
      // At this point we have the complete operator tree
      // from which we want to find the reduce operator
      genMapRedTasks(pCtx);

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java?rev=1440695&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/CheckColumnAccessHook.java Wed Jan 30 21:52:58 2013
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+
+import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
+import org.mortbay.log.Log;
+
+/*
+ * This hook is used for verifying the column access information
+ * that is generated and maintained in the QueryPlan object by the
+ * ColumnAccessAnalyer. All the hook does is print out the columns
+ * accessed from each table as recorded in the ColumnAccessInfo
+ * in the QueryPlan.
+ */
+public class CheckColumnAccessHook implements ExecuteWithHookContext {
+
+ public void run(HookContext hookContext) {
+ Log.info("Running CheckColumnAccessHook");
+ HiveConf conf = hookContext.getConf();
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == false) {
+ return;
+ }
+
+ QueryPlan plan = hookContext.getQueryPlan();
+ if (plan == null) {
+ return;
+ }
+
+ ColumnAccessInfo columnAccessInfo = hookContext.getQueryPlan().getColumnAccessInfo();
+ if (columnAccessInfo == null) {
+ return;
+ }
+
+ LogHelper console = SessionState.getConsole();
+ Map<String, Set<String>> tableToColumnAccessMap =
+ columnAccessInfo.getTableToColumnAccessMap();
+
+ // We need a new map to ensure output is always produced in the same order.
+ // This makes tests that use this hook deterministic.
+ Map<String, String> outputOrderedMap = new HashMap<String, String>();
+
+ for (Map.Entry<String, Set<String>> tableAccess : tableToColumnAccessMap.entrySet()) {
+ StringBuilder perTableInfo = new StringBuilder();
+ perTableInfo.append("Table:").append(tableAccess.getKey()).append("\n");
+ // Sort columns to make output deterministic
+ String[] columns = new String[tableAccess.getValue().size()];
+ tableAccess.getValue().toArray(columns);
+ Arrays.sort(columns);
+ perTableInfo.append("Columns:").append(StringUtils.join(columns, ','))
+ .append("\n");
+ outputOrderedMap.put(tableAccess.getKey(), perTableInfo.toString());
+ }
+
+ for (String perOperatorInfo : outputOrderedMap.values()) {
+ console.printError(perOperatorInfo);
+ }
+ }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/column_access_stats.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/column_access_stats.q?rev=1440695&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/column_access_stats.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/column_access_stats.q Wed Jan 30 21:52:58 2013
@@ -0,0 +1,158 @@
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.CheckColumnAccessHook;
+SET hive.stats.collect.scancols=true;
+
+-- This test is used for testing the ColumnAccessAnalyzer
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
+
+-- Simple select queries
+SELECT key FROM T1 ORDER BY key;
+SELECT key, val FROM T1 ORDER BY key, val;
+SELECT 1 FROM T1;
+
+-- More complicated select queries
+EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key;
+SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key;
+EXPLAIN SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k;
+SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k;
+SELECT key + 1 as k FROM T1 ORDER BY k;
+SELECT key + val as k FROM T1 ORDER BY k;
+
+-- Work with union
+EXPLAIN
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT val as c FROM T1
+) subq1 ORDER BY c;
+
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT val as c FROM T1
+) subq1 ORDER BY c;
+
+EXPLAIN
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT key as c FROM T1
+) subq1 ORDER BY c;
+
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT key as c FROM T1
+) subq1 ORDER BY c;
+
+-- Work with insert overwrite
+FROM T1
+INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key
+INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key;
+
+-- Simple joins
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key, T1.val, T2.key, T2.val;
+
+EXPLAIN
+SELECT T1.key
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key;
+
+SELECT T1.key
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key;
+
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = T2.val
+ORDER BY T1.key, T1.val;
+
+-- Map join
+SELECT /*+ MAPJOIN(a) */ *
+FROM T1 a JOIN T2 b
+ON a.key = b.key
+ORDER BY a.key, a.val, b.key, b.val;
+
+-- More joins
+EXPLAIN
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3
+ORDER BY T1.key, T1.val;
+
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3
+ORDER BY T1.key, T1.val;
+
+EXPLAIN
+SELECT subq1.val
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val
+ORDER BY subq1.val;
+
+SELECT subq1.val
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val
+ORDER BY subq1.val;
+
+-- Join followed by join
+EXPLAIN
+SELECT *
+FROM
+(
+ SELECT subq1.key as key
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.key = subq2.key
+) T4
+JOIN T3
+ON T3.key = T4.key
+ORDER BY T3.key, T4.key;
+
+SELECT *
+FROM
+(
+ SELECT subq1.key as key
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.key = subq2.key
+) T4
+JOIN T3
+ON T3.key = T4.key
+ORDER BY T3.key, T4.key;

Added: hive/trunk/ql/src/test/results/clientpositive/column_access_stats.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/column_access_stats.q.out?rev=1440695&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/column_access_stats.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/column_access_stats.q.out Wed Jan 30 21:52:58 2013
@@ -0,0 +1,1077 @@
+PREHOOK: query: -- This test is used for testing the ColumnAccessAnalyzer
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: query: -- Simple select queries
+SELECT key FROM T1 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key
+
+1
+2
+3
+7
+8
+8
+PREHOOK: query: SELECT key, val FROM T1 ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key,val
+
+1 11
+2 12
+3 13
+7 17
+8 18
+8 28
+PREHOOK: query: SELECT 1 FROM T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+1
+1
+1
+1
+1
+1
+PREHOOK: query: -- More complicated select queries
+EXPLAIN SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key FROM (SELECT key, val FROM T1) subq1 ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key
+
+1
+2
+3
+7
+8
+8
+PREHOOK: query: EXPLAIN SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 ORDER BY k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key
+
+1
+2
+3
+7
+8
+8
+PREHOOK: query: SELECT key + 1 as k FROM T1 ORDER BY k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key
+
+2.0
+3.0
+4.0
+8.0
+9.0
+9.0
+PREHOOK: query: SELECT key + val as k FROM T1 ORDER BY k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key,val
+
+12.0
+14.0
+16.0
+24.0
+26.0
+36.0
+PREHOOK: query: -- Work with union
+EXPLAIN
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT val as c FROM T1
+) subq1 ORDER BY c
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:subq1-subquery1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ null-subquery2:subq1-subquery2:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: val
+ type: string
+ outputColumnNames: _col0
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT val as c FROM T1
+) subq1 ORDER BY c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key,val
+
+1
+11
+12
+13
+17
+18
+2
+28
+3
+7
+8
+8
+PREHOOK: query: EXPLAIN
+SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT key as c FROM T1
+) subq1 ORDER BY c
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) c))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL c)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:subq1-subquery1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ null-subquery2:subq1-subquery2:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM (
+SELECT key as c FROM T1
+ UNION ALL
+SELECT key as c FROM T1
+) subq1 ORDER BY c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+Table:t1
+Columns:key
+
+1
+1
+2
+2
+3
+3
+7
+7
+8
+8
+8
+8
+PREHOOK: query: -- Work with insert overwrite
+FROM T1
+INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key
+INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
+Table:t1
+Columns:key,val
+
+PREHOOK: query: -- Simple joins
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key, T1.val, T2.key, T2.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key,val
+
+Table:t1
+Columns:key,val
+
+1 11 1 1
+2 12 2 1
+3 13 3 1
+7 17 7 1
+8 18 8 2
+8 28 8 2
+PREHOOK: query: EXPLAIN
+SELECT T1.key
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL T1) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ t2
+ TableScan
+ alias: t2
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT T1.key
+FROM T1 JOIN T2
+ON T1.key = T2.key
+ORDER BY T1.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key
+
+Table:t1
+Columns:key
+
+1
+2
+3
+7
+8
+8
+PREHOOK: query: SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = T2.val
+ORDER BY T1.key, T1.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key,val
+
+Table:t1
+Columns:key,val
+
+PREHOOK: query: -- Map join
+SELECT /*+ MAPJOIN(a) */ *
+FROM T1 a JOIN T2 b
+ON a.key = b.key
+ORDER BY a.key, a.val, b.key, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key,val
+
+Table:t1
+Columns:key,val
+
+1 11 1 1
+2 12 2 1
+3 13 3 1
+7 17 7 1
+8 18 8 2
+8 28 8 2
+PREHOOK: query: -- More joins
+EXPLAIN
+SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3
+ORDER BY T1.key, T1.val
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME T1)) (TOK_TABREF (TOK_TABNAME T2)) (and (AND (= (. (TOK_TABLE_OR_COL T1) key) (. (TOK_TABLE_OR_COL T2) key)) (= (. (TOK_TABLE_OR_COL T1) val) 3)) (= (. (TOK_TABLE_OR_COL T2) val) 3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T1) val)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (val = 3.0)
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ t2
+ TableScan
+ alias: t2
+ Filter Operator
+ predicate:
+ expr: (val = 3.0)
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT *
+FROM T1 JOIN T2
+ON T1.key = T2.key AND T1.val = 3 and T2.val = 3
+ORDER BY T1.key, T1.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key,val
+
+Table:t1
+Columns:key,val
+
+PREHOOK: query: EXPLAIN
+SELECT subq1.val
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val
+ORDER BY subq1.val
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 5)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) val) (. (TOK_TABLE_OR_COL subq2) val)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) val))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq1) val)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (key = 5.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: val
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ subq2:t2
+ TableScan
+ alias: t2
+ Filter Operator
+ predicate:
+ expr: (key = 6.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: val
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT subq1.val
+FROM
+(
+ SELECT val FROM T1 WHERE key = 5
+) subq1
+JOIN
+(
+ SELECT val FROM T2 WHERE key = 6
+) subq2
+ON subq1.val = subq2.val
+ORDER BY subq1.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+Table:t2
+Columns:key,val
+
+Table:t1
+Columns:key,val
+
+PREHOOK: query: -- Join followed by join
+EXPLAIN
+SELECT *
+FROM
+(
+ SELECT subq1.key as key
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.key = subq2.key
+) T4
+JOIN T3
+ON T3.key = T4.key
+ORDER BY T3.key, T4.key
+PREHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 'teststring' val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key) key)))) T4) (TOK_TABREF (TOK_TABNAME T3)) (= (. (TOK_TABLE_OR_COL T3) key) (. (TOK_TABLE_OR_COL T4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL T3) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABL
  E_OR_COL T4) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t4:subq1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ t4:subq2:t2
+ TableScan
+ alias: t2
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ t3
+ TableScan
+ alias: t3
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ expr: _col0
+ type: string
+ sort order: ++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT *
+FROM
+(
+ SELECT subq1.key as key
+ FROM
+ (
+ SELECT key, val FROM T1
+ ) subq1
+ JOIN
+ (
+ SELECT key, 'teststring' as val FROM T2
+ ) subq2
+ ON subq1.key = subq2.key
+) T4
+JOIN T3
+ON T3.key = T4.key
+ORDER BY T3.key, T4.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+Table:t3
+Columns:key,val
+
+Table:t2
+Columns:key
+
+Table:t1
+Columns:key
+
+1 1 11.0
+2 2 12.0
+3 3 13.0
+7 7 17.0
+8 8 46.0
+8 8 46.0

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 30, '13 at 9:53p
activeJan 30, '13 at 9:53p
posts1
users1
websitehive.apache.org

1 user in discussion

Kevinwilfong: 1 post

People

Translate

site design / logo © 2021 Grokbase