FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-1 96c9ee64f -> 976938518


HIVE-12788: Setting hive.optimize.union.remove to TRUE will break UNION ALL with aggregate functions (Chaoyu Tang, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97693851
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97693851
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97693851

Branch: refs/heads/branch-1
Commit: 976938518f24ec179c0284f9646cf4fb412aa8a2
Parents: 96c9ee6
Author: ctang <ctang.ma@gmail.com>
Authored: Wed Jan 13 11:08:48 2016 -0500
Committer: ctang <ctang.ma@gmail.com>
Committed: Wed Jan 13 11:08:48 2016 -0500

----------------------------------------------------------------------
  .../hive/ql/optimizer/StatsOptimizer.java | 48 +-
  .../queries/clientpositive/union_remove_26.q | 112 +++
  .../clientpositive/union_remove_26.q.out | 823 +++++++++++++++++++
  3 files changed, 975 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/97693851/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index bc8d8f7..5e5169d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -115,7 +115,8 @@ public class StatsOptimizer implements Transform {
      opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS),
              new MetaDataProcessor(pctx));

- Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
+ NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext();
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx);
      GraphWalker ogw = new DefaultGraphWalker(disp);

      ArrayList<Node> topNodes = new ArrayList<Node>();
@@ -124,6 +125,10 @@ public class StatsOptimizer implements Transform {
      return pctx;
    }

+ private static class StatsOptimizerProcContext implements NodeProcessorCtx {
+ boolean stopProcess = false;
+ }
+
    private static class MetaDataProcessor implements NodeProcessor {

      private final ParseContext pctx;
@@ -201,7 +206,21 @@ public class StatsOptimizer implements Transform {
        // 3. Connect to metastore and get the stats
        // 4. Compose rows and add it in FetchWork
        // 5. Delete GBY - RS - GBY - SEL from the pipeline.
+ StatsOptimizerProcContext soProcCtx = (StatsOptimizerProcContext) procCtx;
+
+ // If the optimization has been stopped for the reasons like being not qualified,
+ // or lack of the stats data. we do not continue this process. For an example,
+ // for a query select max(value) from src1 union all select max(value) from src2
+ // if it has been union remove optimized, the AST tree will become
+ // TS[0]->SEL[1]->GBY[2]-RS[3]->GBY[4]->FS[17]
+ // TS[6]->SEL[7]->GBY[8]-RS[9]->GBY[10]->FS[18]
+ // if TS[0] branch for src1 is not optimized because src1 does not have column stats
+ // there is no need to continue processing TS[6] branch
+ if (soProcCtx.stopProcess) {
+ return null;
+ }

+ boolean isOptimized = false;
        try {
          TableScanOperator tsOp = (TableScanOperator) stack.get(0);
          if (tsOp.getNumParent() > 0) {
@@ -586,7 +605,6 @@ public class StatsOptimizer implements Transform {
            }
          }

-
          List<List<Object>> allRows = new ArrayList<List<Object>>();
          allRows.add(oneRow);

@@ -596,19 +614,33 @@ public class StatsOptimizer implements Transform {
            colNames.add(colInfo.getInternalName());
            ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
          }
- StandardStructObjectInspector sOI = ObjectInspectorFactory.
- getStandardStructObjectInspector(colNames, ois);
- FetchWork fWork = new FetchWork(allRows, sOI);
- FetchTask fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
- fWork.setLimit(allRows.size());
- pctx.setFetchTask(fTask);

+ FetchWork fWork = null;
+ FetchTask fTask = pctx.getFetchTask();
+ if (fTask != null) {
+ fWork = fTask.getWork();
+ fWork.getRowsComputedUsingStats().addAll(allRows);
+ } else {
+ StandardStructObjectInspector sOI = ObjectInspectorFactory.
+ getStandardStructObjectInspector(colNames, ois);
+ fWork = new FetchWork(allRows, sOI);
+ fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
+ pctx.setFetchTask(fTask);
+ }
+ fWork.setLimit(fWork.getRowsComputedUsingStats().size());
+ isOptimized = true;
          return null;
        } catch (Exception e) {
          // this is best effort optimization, bail out in error conditions and
          // try generate and execute slower plan
          Log.debug("Failed to optimize using metadata optimizer", e);
          return null;
+ } finally {
+ // If StatOptimization is not applied for any reason, the FetchTask should still not have been set
+ if (!isOptimized) {
+ soProcCtx.stopProcess = true;
+ pctx.setFetchTask(null);
+ }
        }
      }


http://git-wip-us.apache.org/repos/asf/hive/blob/97693851/ql/src/test/queries/clientpositive/union_remove_26.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q
new file mode 100644
index 0000000..9bdb8aa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_26.q
@@ -0,0 +1,112 @@
+set hive.stats.autogather=true;
+set hive.mapred.supports.subdirectories=true;
+
+-- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile;
+create table inputSrcTbl2(key string, val int) stored as textfile;
+create table inputSrcTbl3(key string, val int) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1;
+load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2;
+load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3;
+
+create table inputTbl1(key string, val int) stored as textfile;
+create table inputTbl2(key string, val int) stored as textfile;
+create table inputTbl3(key string, val int) stored as textfile;
+
+insert into inputTbl1 select * from inputSrcTbl1;
+insert into inputTbl2 select * from inputSrcTbl2;
+insert into inputTbl3 select * from inputSrcTbl3;
+
+set hive.compute.query.using.stats=true;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+--- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns;
+analyze table inputTbl3 compute statistics for columns;
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns;
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key;
+
+select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=false;
+
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/97693851/ql/src/test/results/clientpositive/union_remove_26.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_26.q.out b/ql/src/test/results/clientpositive/union_remove_26.q.out
new file mode 100644
index 0000000..3e2cc18
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_remove_26.q.out
@@ -0,0 +1,823 @@
+PREHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl1
+POSTHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl1
+PREHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl2
+POSTHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl2
+PREHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl3
+POSTHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl3
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl1
+PREHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl2
+POSTHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl2
+PREHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl3
+POSTHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl3
+PREHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl2
+POSTHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl2
+PREHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl3
+POSTHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl3
+PREHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl1
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl1
+POSTHOOK: Output: default@inputtbl1
+POSTHOOK: Lineage: inputtbl1.key SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl1.val SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl2
+PREHOOK: Output: default@inputtbl2
+POSTHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl2
+POSTHOOK: Output: default@inputtbl2
+POSTHOOK: Lineage: inputtbl2.key SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl2.val SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl3
+PREHOOK: Output: default@inputtbl3
+POSTHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl3
+POSTHOOK: Output: default@inputtbl3
+POSTHOOK: Lineage: inputtbl3.key SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl3.val SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: --- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+PREHOOK: query: analyze table inputTbl3 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table inputTbl3 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: --- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col2), max(_col2)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col2), max(_col2)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col2), max(_col2)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+14
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3, Stage-4
+ Stage-3 is a root stage
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(_col1), max(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3

Search Discussions

Discussion Posts

Previous

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 3 of 3 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 13, '16 at 2:10p
activeJan 13, '16 at 4:09p
posts3
users1
websitehive.apache.org

1 user in discussion

Ctang: 3 posts

People

Translate

site design / logo © 2021 Grokbase