FAQ
Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets6.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets6.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets6.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,8 @@
+set hive.new.job.grouping.set.cardinality=2;
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
+-- This is not allowed with distincts.
+SELECT a, b, count(distinct c) from T1 group by a, b with cube;
+

Added: hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets7.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets7.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/groupby_grouping_sets7.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,10 @@
+set hive.new.job.grouping.set.cardinality=2;
+set hive.map.aggr=true;
+set hive.groupby.skewindata=true;
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
+-- This is not allowed with map-side aggregation and skew
+SELECT a, b, count(1) from T1 group by a, b with cube;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,23 @@
+set hive.new.job.grouping.set.cardinality=2;
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+
+-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube;
+SELECT a, b, count(*) from T1 group by a, b with cube;
+
+EXPLAIN
+SELECT a, b, sum(c) from T1 group by a, b with cube;
+SELECT a, b, sum(c) from T1 group by a, b with cube;
+
+CREATE TABLE T2(a STRING, b STRING, c int, d int);
+
+INSERT OVERWRITE TABLE T2
+SELECT a, b, c, c from T1;
+
+EXPLAIN
+SELECT a, b, sum(c+d) from T2 group by a, b with cube;
+SELECT a, b, sum(c+d) from T2 group by a, b with cube;

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,27 @@
+-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
+-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used,
+-- this tests that the aggregate function stores the partial aggregate state correctly even if an
+-- additional MR job is created for processing the grouping sets.
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1;
+LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1;
+
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.new.job.grouping.set.cardinality = 30;
+
+-- The query below will execute in a single MR job, since 4 rows are generated per input row
+-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and
+-- hive.new.job.grouping.set.cardinality is more than 4.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
+
+set hive.new.job.grouping.set.cardinality=2;
+
+-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
+-- The partial aggregation state should be maintained correctly across MR jobs.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,39 @@
+set hive.merge.mapfiles = false;
+set hive.merge.mapredfiles = false;
+-- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+
+-- This tests that cubes and rollups work fine inside sub-queries.
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a;
+
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a;
+
+set hive.new.job.grouping.set.cardinality=2;
+
+-- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created
+-- for each of them
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a;
+
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a;
+

Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q Wed Jan 9 17:59:23 2013
@@ -0,0 +1,25 @@
+set hive.merge.mapfiles = false;
+set hive.merge.mapredfiles = false;
+-- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1;
+
+-- This tests that cubes and rollups work fine where the source is a sub-query
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;
+
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;
+
+set hive.new.job.grouping.set.cardinality=2;
+
+-- Since 4 grouping sets would be generated for the cube, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;
+
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;

Added: hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets6.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets6.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets6.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets6.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+FAILED: SemanticException [Error 10226]: An additional MR job is introduced since the cardinality of grouping sets is more than hive.new.job.grouping.set.cardinality. This functionality is not supported with distincts. Either set hive.new.job.grouping.set.cardinality to a high number (higher than the number of rows per input row due to grouping sets in the query), or rewrite the query to not use distincts. The number of rows per input row due to grouping sets is 4

Added: hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets7.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets7.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/groupby_grouping_sets7.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+FAILED: SemanticException [Error 10225]: An additional MR job is introduced since the number of rows created per input row due to grouping sets is more than hive.new.job.grouping.set.cardinality. There is no need to handle skew separately. set hive.groupby.skewindata to false. The number of rows per input row due to grouping sets is 4

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,520 @@
+PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+NULL 1 2
+NULL 2 3
+NULL 3 1
+1 NULL 1
+1 1 1
+2 NULL 2
+2 2 1
+2 3 1
+3 NULL 1
+3 2 1
+5 NULL 1
+5 2 1
+8 NULL 1
+8 1 1
+PREHOOK: query: EXPLAIN
+SELECT a, b, sum(c) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, sum(c) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL c)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: c
+ type: string
+ outputColumnNames: a, b, c
+ Group By Operator
+ aggregations:
+ expr: sum(c)
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: double
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: double
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: double
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 23.0
+NULL 1 4.0
+NULL 2 14.0
+NULL 3 5.0
+1 NULL 3.0
+1 1 3.0
+2 NULL 9.0
+2 2 4.0
+2 3 5.0
+3 NULL 8.0
+3 2 8.0
+5 NULL 2.0
+5 2 2.0
+8 NULL 1.0
+8 1 1.0
+PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T2
+PREHOOK: query: INSERT OVERWRITE TABLE T2
+SELECT a, b, c, c from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: INSERT OVERWRITE TABLE T2
+SELECT a, b, c, c from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+PREHOOK: query: EXPLAIN
+SELECT a, b, sum(c+d) from T2 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, sum(c+d) from T2 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION sum (+ (TOK_TABLE_OR_COL c) (TOK_TABLE_OR_COL d))))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t2
+ TableScan
+ alias: t2
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: c
+ type: int
+ expr: d
+ type: int
+ outputColumnNames: a, b, c, d
+ Group By Operator
+ aggregations:
+ expr: sum((c + d))
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ]
+NULL NULL 46
+NULL 1 8
+NULL 2 28
+NULL 3 10
+1 NULL 6
+1 1 6
+2 NULL 18
+2 2 8
+2 3 10
+3 NULL 16
+3 2 16
+5 NULL 4
+5 2 4
+8 NULL 2
+8 1 2

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,332 @@
+PREHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
+-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used,
+-- this tests that the aggregate function stores the partial aggregate state correctly even if an
+-- additional MR job is created for processing the grouping sets.
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
+-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used,
+-- this tests that the aggregate function stores the partial aggregate state correctly even if an
+-- additional MR job is created for processing the grouping sets.
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
+-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and
+-- hive.new.job.grouping.set.cardinality is more than 4.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
+-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and
+-- hive.new.job.grouping.set.cardinality is more than 4.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: c
+ type: string
+ outputColumnNames: a, b, c
+ Group By Operator
+ aggregations:
+ expr: avg(c)
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: struct<count:bigint,sum:double>
+ expr: _col4
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: avg(VALUE._col0)
+ expr: count(VALUE._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 3.8333333333333335 12
+NULL 1 2.0 5
+NULL 2 5.2 5
+NULL 3 5.0 2
+1 NULL 2.6666666666666665 3
+1 1 3.0 2
+1 2 2.0 1
+2 NULL 5.2 5
+2 2 5.333333333333333 3
+2 3 5.0 2
+3 NULL 8.0 1
+3 2 8.0 1
+5 NULL 2.0 1
+5 1 2.0 1
+8 NULL 1.0 2
+8 1 1.0 2
+PREHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
+-- The partial aggregation state should be maintained correctly across MR jobs.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
+-- The partial aggregation state should be maintained correctly across MR jobs.
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: c
+ type: string
+ outputColumnNames: a, b, c
+ Group By Operator
+ aggregations:
+ expr: avg(c)
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: struct<count:bigint,sum:double>
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: avg(VALUE._col0)
+ expr: count(VALUE._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: struct<count:bigint,sum:double>
+ expr: _col4
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: avg(VALUE._col0)
+ expr: count(VALUE._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 3.8333333333333335 12
+NULL 1 2.0 5
+NULL 2 5.2 5
+NULL 3 5.0 2
+1 NULL 2.6666666666666665 3
+1 1 3.0 2
+1 2 2.0 1
+2 NULL 5.2 5
+2 2 5.333333333333333 3
+2 3 5.0 2
+3 NULL 8.0 1
+3 2 8.0 1
+5 NULL 2.0 1
+5 1 2.0 1
+8 NULL 1.0 2
+8 1 1.0 2

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,673 @@
+PREHOOK: query: -- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- This tests that cubes and rollups work fine inside sub-queries.
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This tests that cubes and rollups work fine inside sub-queries.
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) a) (. (TOK_TABLE_OR_COL subq2) a)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (a < 3.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ 1 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq2:t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (a < 3.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 NULL 1 1 NULL 1
+1 NULL 1 1 1 1
+1 1 1 1 NULL 1
+1 1 1 1 1 1
+2 NULL 2 2 NULL 2
+2 NULL 2 2 2 1
+2 NULL 2 2 3 1
+2 2 1 2 NULL 2
+2 2 1 2 2 1
+2 2 1 2 3 1
+2 3 1 2 NULL 2
+2 3 1 2 2 1
+2 3 1 2 3 1
+PREHOOK: query: -- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created
+-- for each of them
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created
+-- for each of them
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (TOK_TABLE_OR_COL a) 3)) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) a) (. (TOK_TABLE_OR_COL subq2) a)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (a < 3.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ 1 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: bigint
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq2:t1
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate:
+ expr: (a < 3.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
+on subq1.a = subq2.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 NULL 1 1 NULL 1
+1 NULL 1 1 1 1
+1 1 1 1 NULL 1
+1 1 1 1 1 1
+2 NULL 2 2 NULL 2
+2 NULL 2 2 2 1
+2 NULL 2 2 3 1
+2 2 1 2 NULL 2
+2 2 1 2 2 1
+2 2 1 2 3 1
+2 3 1 2 NULL 2
+2 3 1 2 2 1
+2 3 1 2 3 1

Added: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out?rev=1430979&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out Wed Jan 9 17:59:23 2013
@@ -0,0 +1,433 @@
+PREHOOK: query: -- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Set merging to false above to make the explain more readable
+
+CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: -- This tests that cubes and rollups work fine where the source is a sub-query
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This tests that cubes and rollups work fine where the source is a sub-query
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+NULL 1 2
+NULL 2 3
+NULL 3 1
+1 NULL 1
+1 1 1
+2 NULL 2
+2 2 1
+2 3 1
+3 NULL 1
+3 2 1
+5 NULL 1
+5 2 1
+8 NULL 1
+8 1 1
+PREHOOK: query: -- Since 4 grouping sets would be generated for the cube, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since 4 grouping sets would be generated for the cube, an additional MR job should be created
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ outputColumnNames: a, b
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: a
+ type: string
+ expr: b
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: '0'
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+NULL 1 2
+NULL 2 3
+NULL 3 1
+1 NULL 1
+1 1 1
+2 NULL 2
+2 2 1
+2 3 1
+3 NULL 1
+3 2 1
+5 NULL 1
+5 2 1
+8 NULL 1
+8 1 1

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby1.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby1.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby1.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby1.q.xml Wed Jan 9 17:59:23 2013
@@ -657,7 +657,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList1" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -1498,6 +1498,9 @@
            </void>
           </object>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList1"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby2.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby2.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby2.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby2.q.xml Wed Jan 9 17:59:23 2013
@@ -734,7 +734,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList0" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -1739,6 +1739,9 @@
            </void>
           </object>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList0"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby3.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby3.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby3.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby3.q.xml Wed Jan 9 17:59:23 2013
@@ -923,7 +923,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList0" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -2058,6 +2058,9 @@
          <void property="keys">
           <object class="java.util.ArrayList"/>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList0"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby4.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby4.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby4.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby4.q.xml Wed Jan 9 17:59:23 2013
@@ -449,7 +449,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList1" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -1197,6 +1197,9 @@
            </void>
           </object>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList1"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby5.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby5.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby5.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby5.q.xml Wed Jan 9 17:59:23 2013
@@ -504,7 +504,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList1" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -1366,6 +1366,9 @@
            </void>
           </object>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList1"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Modified: hive/trunk/ql/src/test/results/compiler/plan/groupby6.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/groupby6.q.xml?rev=1430979&r1=1430978&r2=1430979&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/groupby6.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/groupby6.q.xml Wed Jan 9 17:59:23 2013
@@ -449,7 +449,7 @@
                     </object>
                    </void>
                    <void property="listGroupingSets">
- <object class="java.util.ArrayList"/>
+ <object id="ArrayList1" class="java.util.ArrayList"/>
                    </void>
                    <void property="memoryThreshold">
                     <float>0.9</float>
@@ -1197,6 +1197,9 @@
            </void>
           </object>
          </void>
+ <void property="listGroupingSets">
+ <object idref="ArrayList1"/>
+ </void>
          <void property="memoryThreshold">
           <float>0.9</float>
          </void>

Search Discussions

Discussion Posts

Previous

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 3 of 3 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 9, '13 at 5:59p
activeJan 9, '13 at 5:59p
posts3
users1
websitehive.apache.org

1 user in discussion

Kevinwilfong: 3 posts

People

Translate

site design / logo © 2021 Grokbase