FAQ
Author: kevinwilfong
Date: Thu Dec 20 04:33:21 2012
New Revision: 1424292

URL: http://svn.apache.org/viewvc?rev=1424292&view=rev
Log:
HIVE-3728. make optimizing multi-group by configurable. (njain via kevinwilfong)

Added:
hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q
hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml.template
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Dec 20 04:33:21 2012
@@ -392,6 +392,8 @@ public class HiveConf extends Configurat
HIVEALIAS("hive.alias", ""),
HIVEMAPSIDEAGGREGATE("hive.map.aggr", true),
HIVEGROUPBYSKEW("hive.groupby.skewindata", false),
+ HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts",
+ true),
HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000),
HIVEJOINCACHESIZE("hive.join.cache.size", 25000),
HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Thu Dec 20 04:33:21 2012
@@ -366,6 +366,25 @@
</property>

<property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1 select col1, count(distinct colx) group by col1
+ insert overwrite table dest2 select col2, count(distinct colx) group by col2;
+
+ With this parameter set to true, first we spray by the distinct value (colx), and then
+ perform the 2 groups bys. This makes sense if map-side aggregation is turned off. However,
+ with maps-side aggregation, it might be useful in some cases to treat the 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to spraying by distinct key first).
+ If this parameter is turned off, we dont consider the fact that the distinct key is the same across
+ different MR jobs.
+ </description>
+</property>
+
+<property>
<name>hive.groupby.mapaggr.checkinterval</name>
<value>100000</value>
<description>Number of rows after which size of the grouping keys/aggregation classes is performed</description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1424292&r1=1424291&r2=1424292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Dec 20 04:33:21 2012
@@ -6707,7 +6707,20 @@ public class SemanticAnalyzer extends Ba
// currently. It doesnt matter whether he has asked to do
// map-side aggregation or not. Map side aggregation is turned off
List<ASTNode> commonDistinctExprs = getCommonDistinctExprs(qb, input);
- boolean optimizeMultiGroupBy = commonDistinctExprs != null;
+
+ // Consider a query like:
+ //
+ // from src
+ // insert overwrite table dest1 select col1, count(distinct colx) group by col1
+ // insert overwrite table dest2 select col2, count(distinct colx) group by col2;
+ //
+ // With HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS set to true, first we spray by the distinct
+ // value (colx), and then perform the 2 groups bys. This makes sense if map-side aggregation is
+ // turned off. However, with maps-side aggregation, it might be useful in some cases to treat
+ // the 2 inserts independently, thereby performing the query above in 2MR jobs instead of 3
+ // (due to spraying by distinct key first).
+ boolean optimizeMultiGroupBy = commonDistinctExprs != null &&
+ conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS);

Operator curr = input;


Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q?rev=1424292&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_mutli_insert_common_distinct.q Thu Dec 20 04:33:21 2012
@@ -0,0 +1,32 @@
+set hive.map.aggr=true;
+
+create table dest1(key int, cnt int);
+create table dest2(key int, cnt int);
+
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+
+select * from dest1 where key < 10 order by key;
+select * from dest2 where key < 20 order by key limit 10;
+
+set hive.optimize.multigroupby.common.distincts=false;
+
+-- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key;
+
+select * from dest1 where key < 10 order by key;
+select * from dest2 where key < 20 order by key limit 10;

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out?rev=1424292&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_mutli_insert_common_distinct.q.out Thu Dec 20 04:33:21 2012
@@ -0,0 +1,532 @@
+PREHOOK: query: create table dest1(key int, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest1(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: create table dest2(key int, cnt int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest2(key int, cnt int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest2
+PREHOOK: query: explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-5 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: -1
+ value expressions:
+ expr: key
+ type: string
+ expr: (key + key)
+ type: double
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col0)
+ bucketGroup: false
+ keys:
+ expr: VALUE._col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col0)
+ bucketGroup: false
+ keys:
+ expr: VALUE._col1
+ type: double
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: double
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: double
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-6
+ Stats-Aggr Operator
+
+
+PREHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from dest1 where key < 10 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1 where key < 10 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1
+2 1
+4 1
+5 1
+8 1
+9 1
+PREHOOK: query: select * from dest2 where key < 20 order by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1
+4 1
+8 1
+10 1
+16 1
+18 1
+PREHOOK: query: -- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no need to spray by distinct key first
+explain
+from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT value)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT value)
+ bucketGroup: false
+ keys:
+ expr: (key + key)
+ type: double
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: double
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: double
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: double
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+
+PREHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: from src
+insert overwrite table dest1 select key, count(distinct value) group by key
+insert overwrite table dest2 select key+key, count(distinct value) group by key+key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from dest1 where key < 10 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest1 where key < 10 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1
+2 1
+4 1
+5 1
+8 1
+9 1
+PREHOOK: query: select * from dest2 where key < 20 order by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1
+4 1
+8 1
+10 1
+16 1
+18 1

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 20, '12 at 4:33a
activeDec 20, '12 at 4:33a
posts1
users1
websitehive.apache.org

1 user in discussion

Kevinwilfong: 1 post

People

Translate

site design / logo © 2021 Grokbase