FAQ
Author: namit
Date: Fri Jan 18 15:53:30 2013
New Revision: 1435203

URL: http://svn.apache.org/viewvc?rev=1435203&view=rev
Log:
HIVE-3915 Union with map-only query on one side and two MR job query on the other
produces wrong results (Kevin Wilfong via namit)


Added:
     hive/trunk/ql/src/test/queries/clientpositive/union33.q
     hive/trunk/ql/src/test/results/clientpositive/union33.q.out
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1435203&r1=1435202&r2=1435203&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Fri Jan 18 15:53:30 2013
@@ -1108,6 +1108,9 @@ public class Driver implements CommandPr

        // Add root Tasks to runnable
        for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ // This should never happen, if it does, it's a bug with the potential to produce
+ // incorrect results.
+ assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
          driverCxt.addToRunnable(tsk);
        }


Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1435203&r1=1435202&r2=1435203&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Fri Jan 18 15:53:30 2013
@@ -177,7 +177,8 @@ public class GenMRUnion1 implements Node
      currTask.addDependentTask(uTask);
      if (ctx.getRootTasks().contains(uTask)) {
        ctx.getRootTasks().remove(uTask);
- if (!ctx.getRootTasks().contains(currTask)) {
+ if (!ctx.getRootTasks().contains(currTask) &&
+ shouldBeRootTask(currTask)) {
          ctx.getRootTasks().add(currTask);
        }
      }
@@ -290,7 +291,7 @@ public class GenMRUnion1 implements Node
      // If it a map-reduce job, create a temporary file
      else {
        // is the current task a root task
- if (shouldBeRootTask(currTask, parseCtx)
+ if (shouldBeRootTask(currTask)
            && (!ctx.getRootTasks().contains(currTask))) {
          ctx.getRootTasks().add(currTask);
        }
@@ -315,7 +316,7 @@ public class GenMRUnion1 implements Node
    }

    private boolean shouldBeRootTask(
- Task<? extends Serializable> currTask, ParseContext parseContext) {
+ Task<? extends Serializable> currTask) {
      return currTask.getParentTasks() == null
(currTask.getParentTasks().size() == 0);
    }

Added: hive/trunk/ql/src/test/queries/clientpositive/union33.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union33.q?rev=1435203&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union33.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/union33.q Fri Jan 18 15:53:30 2013
@@ -0,0 +1,47 @@
+set hive.groupby.skewindata=true;
+
+-- This tests that a union all with a map only subquery on one side and a
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING);
+
+EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a;
+
+INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a;
+
+SELECT COUNT(*) FROM test_src;
+
+EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a;
+
+INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a;
+
+SELECT COUNT(*) FROM test_src;
+
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/union33.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union33.q.out?rev=1435203&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union33.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/union33.q.out Fri Jan 18 15:53:30 2013
@@ -0,0 +1,561 @@
+PREHOOK: query: -- This tests that a union all with a map only subquery on one side and a
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- This tests that a union all with a map only subquery on one side and a
+-- subquery involving two map reduce jobs on the other runs correctly.
+
+CREATE TABLE test_src (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-9 is a root stage
+ Stage-10 depends on stages: Stage-9
+ Stage-2 depends on stages: Stage-10
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+
+STAGE PLANS:
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:a-subquery2:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-10
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+ null-subquery1:a-subquery1:src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key = 0.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src
+POSTHOOK: query: INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, value FROM src
+ WHERE key = 0
+UNION ALL
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+PREHOOK: query: SELECT COUNT(*) FROM test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+312
+PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT) value)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 0))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME test_src))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7
+ Stage-6
+ Stage-0 depends on stages: Stage-6, Stage-5, Stage-8
+ Stage-4 depends on stages: Stage-0
+ Stage-5
+ Stage-7
+ Stage-8 depends on stages: Stage-7
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery1:a-subquery1:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToString(_col1)
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+ null-subquery2:a-subquery2:src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key = 0.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ Union
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-9
+ Conditional Operator
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test_src
+
+ Stage: Stage-8
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src
+POSTHOOK: query: INSERT OVERWRITE TABLE test_src
+SELECT key, value FROM (
+ SELECT key, COUNT(*) AS value FROM src
+ GROUP BY key
+UNION ALL
+ SELECT key, value FROM src
+ WHERE key = 0
+)a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.null, (src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT COUNT(*) FROM test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: test_src.value EXPRESSION [(src)src.null, (src)src.FieldSchema(name:value, type:string, comment:default), ]
+312

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 18, '13 at 3:54p
activeJan 18, '13 at 3:54p
posts1
users1
websitehive.apache.org

1 user in discussion

Namit: 1 post

People

Translate

site design / logo © 2021 Grokbase