FAQ
Author: jvs
Date: Tue Jul 26 00:12:38 2011
New Revision: 1150962

URL: http://svn.apache.org/viewvc?rev=1150962&view=rev
Log:
HIVE-2128. Automatic Indexing with multiple tables.
(Syed Albiz via jvs)


Added:
hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
hive/trunk/ql/src/test/results/clientpositive/index_auto_self_join.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
hive/trunk/ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java Tue Jul 26 00:12:38 2011
@@ -82,7 +82,7 @@ public class HiveIndexResult {
BytesRefWritable[] bytesRef = new BytesRefWritable[2];
boolean ignoreHdfsLoc = false;

- public HiveIndexResult(String indexFile, JobConf conf) throws IOException,
+ public HiveIndexResult(List<String> indexFiles, JobConf conf) throws IOException,
HiveException {
job = conf;

@@ -90,18 +90,20 @@ public class HiveIndexResult {
bytesRef[1] = new BytesRefWritable();
ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC);

- if (indexFile != null) {
- Path indexFilePath = new Path(indexFile);
+ if (indexFiles != null && indexFiles.size() > 0) {
FileSystem fs = FileSystem.get(conf);
- FileStatus indexStat = fs.getFileStatus(indexFilePath);
List<Path> paths = new ArrayList<Path>();
- if (indexStat.isDir()) {
- FileStatus[] fss = fs.listStatus(indexFilePath);
- for (FileStatus f : fss) {
- paths.add(f.getPath());
+ for (String indexFile : indexFiles) {
+ Path indexFilePath = new Path(indexFile);
+ FileStatus indexStat = fs.getFileStatus(indexFilePath);
+ if (indexStat.isDir()) {
+ FileStatus[] fss = fs.listStatus(indexFilePath);
+ for (FileStatus f : fss) {
+ paths.add(f.getPath());
+ }
+ } else {
+ paths.add(indexFilePath);
}
- } else {
- paths.add(indexFilePath);
}

long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Tue Jul 26 00:12:38 2011
@@ -20,12 +20,18 @@ package org.apache.hadoop.hive.ql.index;

import java.io.IOException;
import java.util.ArrayList;
+import java.util.List;
import java.util.Iterator;
import java.util.Set;
+import java.util.Map;
+import java.util.Arrays;
+import java.util.HashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -91,15 +97,27 @@ public class HiveIndexedInputFormat exte
return result.toArray(new HiveInputSplit[result.size()]);
}

+ public static List<String> getIndexFiles(String indexFileStr) {
+ // tokenize and store string of form (path,)+
+ if (indexFileStr == null) {
+ return null;
+ }
+ String[] chunks = indexFileStr.split(",");
+ return Arrays.asList(chunks);
+ }
+
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
String indexFileStr = job.get(indexFile);
l4j.info("index_file is " + indexFileStr);
+ List<String> indexFiles = getIndexFiles(indexFileStr);

HiveIndexResult hiveIndexResult = null;
- if (indexFileStr != null) {
+ if (indexFiles != null) {
+ boolean first = true;
+ StringBuilder newInputPaths = new StringBuilder();
try {
- hiveIndexResult = new HiveIndexResult(indexFileStr, job);
+ hiveIndexResult = new HiveIndexResult(indexFiles, job);
} catch (HiveException e) {
l4j.error("Unable to read index..");
throw new IOException(e);
@@ -107,8 +125,6 @@ public class HiveIndexedInputFormat exte

Set<String> inputFiles = hiveIndexResult.buckets.keySet();
Iterator<String> iter = inputFiles.iterator();
- boolean first = true;
- StringBuilder newInputPaths = new StringBuilder();
while(iter.hasNext()) {
String path = iter.next();
if (path.trim().equalsIgnoreCase("")) {
@@ -121,7 +137,6 @@ public class HiveIndexedInputFormat exte
}
newInputPaths.append(path);
}
-
FileInputFormat.setInputPaths(job, newInputPaths.toString());
} else {
return super.getSplits(job, numSplits);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java Tue Jul 26 00:12:38 2011
@@ -81,14 +81,6 @@ public class BitmapIndexHandler extends
return; // abort if we couldn't pull out anything from the predicate
}

- // Build reentrant QL for index query
- StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
-
- String tmpFile = pctx.getContext().getMRTmpFileURI();
- qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
- qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
- qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
-
List<BitmapInnerQuery> iqs = new ArrayList<BitmapInnerQuery>(indexes.size());
int i = 0;
for (Index index : indexes) {
@@ -100,6 +92,17 @@ public class BitmapIndexHandler extends
"ind" + i++));
}
}
+ // setup TableScanOperator to change input format for original query
+ queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
+
+ // Build reentrant QL for index query
+ StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
+
+ String tmpFile = pctx.getContext().getMRTmpFileURI();
+ qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
+ qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
+ qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
+

BitmapQuery head = iqs.get(0);
for ( i = 1; i < iqs.size(); i++) {
@@ -113,10 +116,7 @@ public class BitmapIndexHandler extends
Driver driver = new Driver(pctx.getConf());
driver.compile(qlCommand.toString(), false);

- // setup TableScanOperator to change input format for original query
- queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName());
queryContext.setIndexIntermediateFile(tmpFile);
-
queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
queryContext.setQueryTasks(driver.getPlan().getRootTasks());
}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Jul 26 00:12:38 2011
@@ -161,11 +161,14 @@ public class CompactIndexHandler extends

// pass residual predicate back out for further processing
queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
+ // setup TableScanOperator to change input format for original query
+ queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());

// Build reentrant QL for index query
StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");

String tmpFile = pctx.getContext().getMRTmpFileURI();
+ queryContext.setIndexIntermediateFile(tmpFile);
qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
qlCommand.append("SELECT `_bucketname` , `_offsets` FROM ");
qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
@@ -179,9 +182,6 @@ public class CompactIndexHandler extends
Driver driver = new Driver(pctx.getConf());
driver.compile(qlCommand.toString(), false);

- // setup TableScanOperator to change input format for original query
- queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
- queryContext.setIndexIntermediateFile(tmpFile);

queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
queryContext.setQueryTasks(driver.getPlan().getRootTasks());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java Tue Jul 26 00:12:38 2011
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.parse.P
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;

@@ -83,11 +84,15 @@ public class IndexWhereProcessor impleme
TableScanOperator operator = (TableScanOperator) nd;
List<Node> opChildren = operator.getChildren();
TableScanDesc operatorDesc = operator.getConf();
+ if (operatorDesc == null) {
+ return null;
+ }
ExprNodeDesc predicate = operatorDesc.getFilterExpr();

IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx;
ParseContext pctx = context.getParseContext();
LOG.info("Processing predicate for index optimization");
+
if (predicate == null) {
LOG.info("null predicate pushed down");
return null;
@@ -114,47 +119,42 @@ public class IndexWhereProcessor impleme

// get potential reentrant index queries from each index
Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
- Collection<List<Index>> tableIndexes = indexes.values();
- for (List<Index> indexesOnTable : tableIndexes) {
- List<List<Index>> indexesByType = new ArrayList<List<Index>>();
- for (Index index : indexesOnTable) {
- boolean added = false;
- for (List<Index> indexType : indexesByType) {
- if (indexType.isEmpty()) {
- indexType.add(index);
- added = true;
- } else if (indexType.get(0).getIndexHandlerClass().equals(
- index.getIndexHandlerClass())) {
- indexType.add(index);
- added = true;
- break;
- }
- }
- if (!added) {
- List<Index> newType = new ArrayList<Index>();
- newType.add(index);
- indexesByType.add(newType);
- }
+ // make sure we have an index on the table being scanned
+ TableDesc tblDesc = operator.getTableDesc();
+ Table srcTable = pctx.getTopToTable().get(operator);
+ if (indexes == null || indexes.get(srcTable) == null) {
+ return null;
+ }
+
+ List<Index> tableIndexes = indexes.get(srcTable);
+ Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
+ for (Index indexOnTable : tableIndexes) {
+ if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
+ List<Index> newType = new ArrayList<Index>();
+ newType.add(indexOnTable);
+ indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
+ } else {
+ indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
}
+ }

- // choose index type with most indexes of the same type on the table
- // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
- List<Index> bestIndexes = indexesByType.get(0);
- for (List<Index> indexTypes : indexesByType) {
- if (bestIndexes.size() < indexTypes.size()) {
- bestIndexes = indexTypes;
- }
+ // choose index type with most indexes of the same type on the table
+ // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
+ List<Index> bestIndexes = indexesByType.values().iterator().next();
+ for (List<Index> indexTypes : indexesByType.values()) {
+ if (bestIndexes.size() < indexTypes.size()) {
+ bestIndexes = indexTypes;
}
+ }

- // rewrite index queries for the chosen index type
- HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
- queryContext.setQueryPartitions(queryPartitions);
- rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
- List<Task<?>> indexTasks = queryContext.getQueryTasks();
+ // rewrite index queries for the chosen index type
+ HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
+ tmpQueryContext.setQueryPartitions(queryPartitions);
+ rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
+ List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();

- if (indexTasks != null && indexTasks.size() > 0) {
- queryContexts.put(bestIndexes.get(0), queryContext);
- }
+ if (indexTasks != null && indexTasks.size() > 0) {
+ queryContexts.put(bestIndexes.get(0), tmpQueryContext);
}
// choose an index rewrite to use
if (queryContexts.size() > 0) {
@@ -168,8 +168,7 @@ public class IndexWhereProcessor impleme
// prepare the map reduce job to use indexing
MapredWork work = currentTask.getWork();
work.setInputformat(queryContext.getIndexInputFormat());
- work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());
-
+ work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
// modify inputs based on index query
Set<ReadEntity> inputs = pctx.getSemanticInputs();
inputs.addAll(queryContext.getAdditionalSemanticInputs());
@@ -226,8 +225,6 @@ public class IndexWhereProcessor impleme
return;
}

-
-
/**
* Check the partitions used by the table scan to make sure they also exist in the
* index table
@@ -239,6 +236,7 @@ public class IndexWhereProcessor impleme
throws HiveException {
Hive hive = Hive.get(pctx.getConf());

+
// make sure each partition exists on the index table
PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
Set<Partition> queryPartitions = queryPartitionList.getConfirmedPartns();
@@ -259,6 +257,9 @@ public class IndexWhereProcessor impleme
private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException {
List<Table> indexTables = new ArrayList<Table>();
Table partitionedTable = part.getTable();
+ if (indexes == null || indexes.get(partitionedTable) == null) {
+ return indexTables;
+ }
for (Index index : indexes.get(partitionedTable)) {
indexTables.add(hive.getTable(index.getIndexTableName()));
}
@@ -276,6 +277,10 @@ public class IndexWhereProcessor impleme
return true; // empty specs come from non-partitioned tables
}

+ if (indexTables == null || indexTables.size() == 0) {
+ return false;
+ }
+
for (Table indexTable : indexTables) {
// get partitions that match the spec
List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java Tue Jul 26 00:12:38 2011
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapredWork;

/**
*
@@ -87,10 +88,14 @@ public class IndexWhereTaskDispatcher im
operatorRules,
indexWhereOptimizeCtx);

- // walk the mapper operator(not task) tree
+ // walk the mapper operator(not task) tree for each specific task
GraphWalker ogw = new DefaultGraphWalker(dispatcher);
ArrayList<Node> topNodes = new ArrayList<Node>();
- topNodes.addAll(pctx.getTopOps().values());
+ if (task.getWork() instanceof MapredWork) {
+ topNodes.addAll(((MapredWork)task.getWork()).getAliasToWork().values());
+ } else {
+ return null;
+ }
ogw.startWalking(topNodes, null);

return null;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1150962&r1=1150961&r2=1150962&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Tue Jul 26 00:12:38 2011
@@ -387,8 +387,12 @@ public class MapredWork implements Seria
return indexIntermediateFile;
}

- public void setIndexIntermediateFile(String fileName) {
- this.indexIntermediateFile = fileName;
+ public void addIndexIntermediateFile(String fileName) {
+ if (this.indexIntermediateFile == null) {
+ this.indexIntermediateFile = fileName;
+ } else {
+ this.indexIntermediateFile += "," + fileName;
+ }
}

public void setGatheringStats(boolean gatherStats) {

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,23 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_index ON srcpart REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;
+DROP INDEX srcpart_index on src;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,23 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+
+CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX srcpart_index ON srcpart REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;
+DROP INDEX srcpart_index on src;

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_self_join.q Tue Jul 26 00:12:38 2011
@@ -0,0 +1,18 @@
+-- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
+ALTER INDEX src_index ON src REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- automatic indexing
+EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key;
+
+DROP INDEX src_index on src;

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out Tue Jul 26 00:12:38 2011
@@ -0,0 +1,638 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ b
+ TableScan
+ alias: b
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_232_8620953468013110737/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-49-01_720_2821092727071549075/-mr-10000
+82 val_82
+82 val_82
+82 val_82
+82 val_82
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+85 val_85
+85 val_85
+85 val_85
+85 val_85
+86 val_86
+86 val_86
+86 val_86
+86 val_86
+87 val_87
+87 val_87
+87 val_87
+87 val_87
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-1 depends on stages: Stage-4, Stage-6
+ Stage-2 depends on stages: Stage-1
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ tmp_index:ind0:default__srcpart_srcpart_index__
+ TableScan
+ alias: default__srcpart_srcpart_index__
+ filterExpr:
+ expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offset
+ type: bigint
+ expr: _bitmaps
+ type: array<bigint>
+ outputColumnNames: _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: collect_set(_col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: array<bigint>
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: collect_set(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10003
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ filterExpr:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ b
+ TableScan
+ alias: b
+ filterExpr:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ tmp_index:ind0:default__src_src_index__
+ TableScan
+ alias: default__src_src_index__
+ filterExpr:
+ expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offset
+ type: bigint
+ expr: _bitmaps
+ type: array<bigint>
+ outputColumnNames: _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: collect_set(_col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: array<bigint>
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: collect_set(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-02_560_7042516595035703988/-mr-10004
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-03_417_3979280982191225396/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+82 val_82
+82 val_82
+82 val_82
+82 val_82
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+85 val_85
+85 val_85
+85 val_85
+85 val_85
+86 val_86
+86 val_86
+86 val_86
+86 val_86
+87 val_87
+87 val_87
+87 val_87
+87 val_87
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP INDEX srcpart_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX srcpart_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out?rev=1150962&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out Tue Jul 26 00:12:38 2011
@@ -0,0 +1,564 @@
+PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing
+
+-- without indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ b
+ TableScan
+ alias: b
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_433_5542638095321427981/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-50-36_581_3747634705901315299/-mr-10000
+82 val_82
+82 val_82
+82 val_82
+82 val_82
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+85 val_85
+85 val_85
+85 val_85
+85 val_85
+86 val_86
+86 val_86
+86 val_86
+86 val_86
+87 val_87
+87 val_87
+87 val_87
+87 val_87
+PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX src_index ON src REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@default__src_src_index__
+POSTHOOK: query: ALTER INDEX src_index ON src REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@default__src_src_index__
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- automatic indexing
+EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-8 depends on stages: Stage-5 , consists of Stage-7, Stage-6
+ Stage-7
+ Stage-4 depends on stages: Stage-7, Stage-6
+ Stage-1 depends on stages: Stage-4, Stage-9
+ Stage-2 depends on stages: Stage-1
+ Stage-6
+ Stage-10 is a root stage
+ Stage-13 depends on stages: Stage-10 , consists of Stage-12, Stage-11
+ Stage-12
+ Stage-9 depends on stages: Stage-12, Stage-11
+ Stage-11
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ default__srcpart_srcpart_index__
+ TableScan
+ alias: default__srcpart_srcpart_index__
+ filterExpr:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offsets
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10000
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10003
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ filterExpr:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ b
+ TableScan
+ alias: b
+ filterExpr:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 70) and (key < 90))
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10002
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_494_7032523871014076058/-ext-10001
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-10
+ Map Reduce
+ Alias -> Map Operator Tree:
+ default__src_src_index__
+ TableScan
+ alias: default__src_src_index__
+ filterExpr:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 80) and (key < 100))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offsets
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-13
+ Conditional Operator
+
+ Stage: Stage-12
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10000
+
+ Stage: Stage-9
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-34_909_919679217594371878/-mr-10004
+
+ Stage: Stage-11
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-25_11-51-35_590_77690777312722941/-ext-10001
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__src_src_index__
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
+POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__src_src_index__
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-25_11-51-35_695_2966341580180350713/-mr-10000
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+82 val_82
+82 val_82
+82 val_82
+82 val_82
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+84 val_84
+85 val_85
+85 val_85
+85 val_85
+85 val_85
+86 val_86
+86 val_86
+86 val_86
+86 val_86
+87 val_87
+87 val_87
+87 val_87
+87 val_87
+PREHOOK: query: DROP INDEX src_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX src_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP INDEX srcpart_index on src
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX srcpart_index on src
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]

Search Discussions

Discussion Posts

Previous

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 2 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJul 26, '11 at 12:13a
activeJul 26, '11 at 12:13a
posts2
users1
websitehive.apache.org

1 user in discussion

Jvs: 2 posts

People

Translate

site design / logo © 2022 Grokbase