FAQ
Repository: hive
Updated Branches:
   refs/heads/master bb7153f9b -> 037fb02a8


HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/037fb02a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/037fb02a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/037fb02a

Branch: refs/heads/master
Commit: 037fb02a8edc1266d0beb02eee1fb90737fc1ef7
Parents: bb7153f
Author: Gopal V <gopalv@apache.org>
Authored: Thu Aug 27 02:27:35 2015 +0200
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Thu Aug 27 02:27:35 2015 +0200

----------------------------------------------------------------------
  .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java | 4 ++--
  .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 8 ++++++++
  .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java | 9 +++++----
  3 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
index 366b74b..6a81170 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
@@ -19,7 +19,7 @@
  package org.apache.hadoop.hive.ql.parse;

  import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.IdentityHashMap;
  import java.util.List;
  import java.util.Map;

@@ -53,7 +53,7 @@ public class LeadLagInfo {
    public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
      addLeadLagExpr(llFuncExpr);
      mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
- new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
+ new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
      List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
      if (funcList == null) {
        funcList = new ArrayList<ExprNodeGenericFuncDesc>();

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
index 15267b9..328bd86 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
@@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
      return typeInfo.hashCode();
    }

+ @Override
+ public final boolean equals(Object o) {
+ // prevent equals from being overridden in sub-classes
+ // always use ExprNodeDescEqualityWrapper
+ // if you need any other equality than Object.equals()
+ return (o == this);
+ }
+
    public TypeInfo getTypeInfo() {
      return typeInfo;
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
index 33ad3e8..f23facf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;

  import java.util.ArrayList;
  import java.util.HashMap;
+import java.util.IdentityHashMap;
  import java.util.List;
  import java.util.Map;
  import java.util.Map.Entry;
@@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
    public ExprWalkerInfo() {
      pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
      nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
- exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
- newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+ exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
+ newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
    }

    public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
      this.op = op;

      pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
- exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
+ exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
      nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
- newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
+ newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
    }

    /**

Search Discussions

  • Jcamacho at Aug 27, 2015 at 12:30 am
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 8aab8cf1d -> 23878debd


    HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/23878deb
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/23878deb
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/23878deb

    Branch: refs/heads/branch-1
    Commit: 23878debdba90885dc37057213f772820b7a49e8
    Parents: 8aab8cf
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 02:29:47 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 02:29:47 2015 +0200

    ----------------------------------------------------------------------
      .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java | 4 ++--
      .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 8 ++++++++
      .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java | 9 +++++----
      3 files changed, 15 insertions(+), 6 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/23878deb/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    index 366b74b..6a81170 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    @@ -19,7 +19,7 @@
      package org.apache.hadoop.hive.ql.parse;

      import java.util.ArrayList;
    -import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;

    @@ -53,7 +53,7 @@ public class LeadLagInfo {
        public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
          addLeadLagExpr(llFuncExpr);
          mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
    - new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
    + new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
          List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
          if (funcList == null) {
            funcList = new ArrayList<ExprNodeGenericFuncDesc>();

    http://git-wip-us.apache.org/repos/asf/hive/blob/23878deb/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    index 15267b9..328bd86 100755
    --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    @@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
          return typeInfo.hashCode();
        }

    + @Override
    + public final boolean equals(Object o) {
    + // prevent equals from being overridden in sub-classes
    + // always use ExprNodeDescEqualityWrapper
    + // if you need any other equality than Object.equals()
    + return (o == this);
    + }
    +
        public TypeInfo getTypeInfo() {
          return typeInfo;
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/23878deb/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    index 33ad3e8..f23facf 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;

      import java.util.ArrayList;
      import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;
      import java.util.Map.Entry;
    @@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
        public ExprWalkerInfo() {
          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
          this.op = op;

          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        /**
  • Jcamacho at Aug 27, 2015 at 7:51 am
    Repository: hive
    Updated Branches:
       refs/heads/master 037fb02a8 -> b247cac4f


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
    new file mode 100644
    index 0000000..55edd90
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
    @@ -0,0 +1,1647 @@
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + key
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + value
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + ds
    + TOK_WHERE
    + or
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-08'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 1
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-09'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + key
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + value
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: pcr_t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int), value (type: string), ds (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [pcr_t1]
    + /pcr_t1/ds=2000-04-09 [pcr_t1]
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2
    + columns.types int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1, t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-10
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-10
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t1/ds=2000-04-10 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + sort order: +++
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t2
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t2
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
    index f536767..17248e4 100644
    --- a/ql/src/test/results/clientpositive/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
    @@ -390,21 +390,21 @@ STAGE PLANS:
                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
    index 5aa0df8..fb08f10 100644
    --- a/ql/src/test/results/clientpositive/spark/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
    @@ -2534,16 +2534,16 @@ STAGE PLANS:
                        GatherStats: false
                        Filter Operator
                          isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: int), value (type: string), ds (type: string)
                            outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                              sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                              tag: -1
                              auto parallelism: false
                  Path -> Alias:
    @@ -2648,13 +2648,13 @@ STAGE PLANS:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
                        GlobalTableId: 0
      #### A masked pattern was here ####
                        NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    index a6e6e38..52a847a 100644
    --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    @@ -405,21 +405,21 @@ STAGE PLANS:
                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                            Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index e8a9786..9756b0c 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -2909,7 +2909,7 @@ Stage-0
            Select Operator [SEL_2]
               outputColumnNames:["_col0"]
               Filter Operator [FIL_4]
    - predicate:(c_int) IN (-6, 6) (type: boolean)
    + predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
                  TableScan [TS_0]
                     alias:cbo_t1


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
    index 9e47014..73bf12d 100644
    --- a/ql/src/test/results/clientpositive/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
    @@ -46,19 +46,20 @@ STAGE PLANS:
                  alias: alltypesorc
                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized

        Stage: Stage-0
          Fetch Operator
  • Jcamacho at Aug 27, 2015 at 7:51 am
    HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

    Branch: refs/heads/master
    Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
    Parents: 037fb02
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 09:50:08 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 09:50:08 2015 +0200

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
      .../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
      .../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
      .../queries/clientpositive/flatten_and_or.q | 4 +-
      .../test/queries/clientpositive/pointlookup.q | 59 +
      .../test/queries/clientpositive/pointlookup2.q | 51 +
      .../alter_partition_coltype.q.out | 12 +-
      .../clientpositive/annotate_stats_filter.q.out | 8 +-
      .../results/clientpositive/flatten_and_or.q.out | 8 +-
      ql/src/test/results/clientpositive/pcr.q.out | 12 +-
      .../results/clientpositive/pointlookup.q.out | 198 +++
      .../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
      .../results/clientpositive/ppd_transform.q.out | 12 +-
      .../test/results/clientpositive/spark/pcr.q.out | 12 +-
      .../clientpositive/spark/ppd_transform.q.out | 12 +-
      .../clientpositive/spark/vectorized_case.q.out | 2 +-
      .../clientpositive/tez/explainuser_1.q.out | 2 +-
      .../clientpositive/tez/vectorized_case.q.out | 2 +-
      .../clientpositive/vectorized_case.q.out | 9 +-
      19 files changed, 2118 insertions(+), 55 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index 8706a2d..8a00079 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
              "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."),
          HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
               "Whether to transform OR clauses in Filter operators into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
    + "Minimum number of OR clauses needed to transform into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
    + "Extract partial expressions when optimizing point lookup IN clauses"),
          // Constant propagation optimizer
          HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
          HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    index 14f362f..439f616 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    @@ -68,6 +68,18 @@ public class Optimizer {

          // Add the transformation that computes the lineage information.
          transformations.add(new Generator());
    +
    + // Try to transform OR predicates in Filter into simpler IN clauses first
    + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    + final int min = HiveConf.getIntVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
    + final boolean extract = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
    + final boolean testMode = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVE_IN_TEST);
    + transformations.add(new PointLookupOptimizer(min, extract, testMode));
    + }
    +
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PredicateTransitivePropagate());
            if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
    @@ -82,11 +94,6 @@ public class Optimizer {
              transformations.add(new ConstantPropagate());
          }

    - // Try to transform OR predicates in Filter into IN clauses.
    - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    - transformations.add(new PointLookupOptimizer());
    - }
    -
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PartitionPruner());
            transformations.add(new PartitionConditionRemover());

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    index 6a8acec..d83636d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    @@ -18,10 +18,14 @@
      package org.apache.hadoop.hive.ql.optimizer;

      import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.Comparator;
      import java.util.HashMap;
    +import java.util.HashSet;
      import java.util.LinkedHashMap;
      import java.util.List;
      import java.util.Map;
    +import java.util.Set;
      import java.util.Stack;

      import org.apache.calcite.util.Pair;
    @@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
      import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
    +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
      import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
    +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

      import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.ImmutableSortedSet;
      import com.google.common.collect.ListMultimap;

      /**
    @@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
                GenericUDFIn.class.getAnnotation(Description.class).name();
        private static final String STRUCT_UDF =
                GenericUDFStruct.class.getAnnotation(Description.class).name();
    + private static final String AND_UDF =
    + GenericUDFOPAnd.class.getAnnotation(Description.class).name();
    +
    + // these are closure-bound for all the walkers in context
    + public final int minOrExpr;
    + public final boolean extract;
    + public final boolean testMode;
    +
    + /*
    + * Pass in configs and pre-create a parse context
    + */
    + public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
    + this.minOrExpr = min;
    + this.extract = extract;
    + this.testMode = testMode;
    + }
    +
    + // Hash Set iteration isn't ordered, but force string sorted order
    + // to get a consistent test run.
    + private Collection<ExprNodeDescEqualityWrapper> sortForTests(
    + Set<ExprNodeDescEqualityWrapper> valuesExpr) {
    + if (!testMode) {
    + // normal case - sorting is wasted for an IN()
    + return valuesExpr;
    + }
    + final Collection<ExprNodeDescEqualityWrapper> sortedValues;
    +
    + sortedValues = ImmutableSortedSet.copyOf(
    + new Comparator<ExprNodeDescEqualityWrapper>() {
    + @Override
    + public int compare(ExprNodeDescEqualityWrapper w1,
    + ExprNodeDescEqualityWrapper w2) {
    + // fail if you find nulls (this is a test-code section)
    + if (w1.equals(w2)) {
    + return 0;
    + }
    + return w1.getExprNodeDesc().getExprString()
    + .compareTo(w2.getExprNodeDesc().getExprString());
    + }
    + }, valuesExpr);

    + return sortedValues;
    + }

        @Override
        public ParseContext transform(ParseContext pctx) throws SemanticException {
    @@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Generated new predicate with IN clause: " + newPredicate);
              }
    - filterOp.getConf().setOrigPredicate(predicate);
    + if (!extract) {
    + filterOp.getConf().setOrigPredicate(predicate);
    + }
              filterOp.getConf().setPredicate(newPredicate);
            }

    @@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
              return null;
            }

    - // 2. It is an OR operator
    + // 2. It is an OR operator with enough children
            List<ExprNodeDesc> children = fd.getChildren();
    + if (children.size() < minOrExpr) {
    + return null;
    + }
            ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
                    ArrayListMultimap.create();
            boolean modeAnd = false;
    @@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
            newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
                    FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);

    + if (extract && columns.size() > 1) {
    + final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
    +
    + // extract pre-conditions for the tuple expressions
    + // (a,b) IN ((1,2),(2,3)) ->
    + // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
    +
    + for (String keyString : columnConstantsMap.keySet()) {
    + final Set<ExprNodeDescEqualityWrapper> valuesExpr =
    + new HashSet<ExprNodeDescEqualityWrapper>(children.size());
    + final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial =
    + columnConstantsMap.get(keyString);
    + for (int i = 0; i < children.size(); i++) {
    + Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
    + .get(i);
    + valuesExpr
    + .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
    + }
    + ExprNodeColumnDesc lookupCol = partial.get(0).left;
    + // generate a partial IN clause, if the column is a partition column
    + if (lookupCol.getIsPartitionColOrVirtualCol()
    + || valuesExpr.size() < children.size()) {
    + // optimize only nDV reductions
    + final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
    + inExpr.add(lookupCol);
    + for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
    + inExpr.add(value.getExprNodeDesc());
    + }
    + subExpr.add(new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
    + }
    + }
    + // loop complete, inspect the sub expressions generated
    + if (subExpr.size() > 0) {
    + // add the newPredicate to the end & produce an AND clause
    + subExpr.add(newPredicate);
    + newPredicate = new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
    + }
    + // else, newPredicate is unmodified
    + }
    +
            return newPredicate;
          }


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
    index 6d65225..6c6e0f9 100644
    --- a/ql/src/test/queries/clientpositive/flatten_and_or.q
    +++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
    @@ -1,3 +1,5 @@
    +set hive.optimize.point.lookup=false;
    +
      explain
      SELECT key
      FROM src
    @@ -14,4 +16,4 @@ WHERE
         AND value = '1') OR (key = '9'
         AND value = '1') OR (key = '10'
         AND value = '3'))
    -;
    \ No newline at end of file
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
    new file mode 100644
    index 0000000..1aef2ef
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup.q
    @@ -0,0 +1,59 @@
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +
    +set hive.optimize.point.lookup.min=3;
    +set hive.optimize.point.lookup.extract=false;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
    new file mode 100644
    index 0000000..31bebbb
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup2.q
    @@ -0,0 +1,51 @@
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    +
    +create table pcr_t1 (key int, value string) partitioned by (ds string);
    +insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
    +
    +create table pcr_t2 (ds string, key int, value string);
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
    +
    +set hive.optimize.point.lookup.min=2;
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds;
    +
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    index 06515da..9fc3c8d 100644
    --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    @@ -1134,15 +1134,11 @@ STAGE PLANS:
                alias: alterdynamic_part_table
                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
                GatherStats: false
    - Filter Operator
    - isSamplingPred: false
    - predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
    + Select Operator
    + expressions: intcol (type: string)
    + outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - Select Operator
    - expressions: intcol (type: string)
    - outputColumnNames: _col0
    - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - ListSink
    + ListSink

      PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
      PREHOOK: type: QUERY

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    index 9e0e78a..054b573 100644
    --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    @@ -678,15 +678,15 @@ STAGE PLANS:
                  alias: loc_orc
                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
                  Filter Operator
    - predicate: (state) IN ('OH', 'CA') (type: boolean)
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                    Select Operator
                      expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                      outputColumnNames: _col0, _col1, _col2, _col3
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    index 5f25daa..9c51ff3 100644
    --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
    +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    @@ -44,15 +44,15 @@ STAGE PLANS:
                  alias: src
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string)
                      outputColumnNames: _col0
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
    index 4c9ea77..d7c40a3 100644
    --- a/ql/src/test/results/clientpositive/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/pcr.q.out
    @@ -2475,16 +2475,16 @@ STAGE PLANS:
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: int), value (type: string), ds (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                        sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                        tag: -1
                        auto parallelism: false
            Path -> Alias:
    @@ -2588,13 +2588,13 @@ STAGE PLANS:
              Select Operator
                expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
                  GlobalTableId: 0
      #### A masked pattern was here ####
                  NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
    new file mode 100644
    index 0000000..7e19be4
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup.q.out
    @@ -0,0 +1,198 @@
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
  • Prasanthj at Aug 27, 2015 at 4:43 pm
    Repository: hive
    Updated Branches:
       refs/heads/master b247cac4f -> 9670a2b3c


    HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

    Branch: refs/heads/master
    Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
    Parents: b247cac
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 11:43:25 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 11:43:25 2015 -0500

    ----------------------------------------------------------------------
      .../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +++++++++++---------
      .../queries/clientnegative/load_orc_negative3.q | 6 ++++
      .../test/queries/clientpositive/load_orc_part.q | 4 +++
      .../clientnegative/load_orc_negative3.q.out | 25 +++++++++++++
      .../results/clientpositive/load_orc_part.q.out | 18 ++++++++++
      5 files changed, 75 insertions(+), 16 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    index 85fa9c9..9d2702f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    @@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          return new URI(fromScheme, fromAuthority, path, null, null);
        }

    - private void applyConstraints(URI fromURI, URI toURI, Tree ast,
    + private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
            boolean isLocal) throws SemanticException {

    + FileStatus[] srcs = null;
    +
          // local mode implies that scheme should be "file"
          // we can change this going forward
          if (isLocal && !fromURI.getScheme().equals("file")) {
    @@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          try {
    - FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
    + srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
            if (srcs == null || srcs.length == 0) {
              throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
                  "No files matching path " + fromURI));
    @@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
                + "\"hive.metastore.warehouse.dir\" do not conflict.";
            throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
          }
    +
    + return srcs;
        }

        @Override
    @@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          // make sure the arguments make sense
    - applyConstraints(fromURI, toURI, fromTree, isLocal);
    + FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);

          // for managed tables, make sure the file formats match
          if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
    - ensureFileFormatsMatch(ts, fromURI);
    + ensureFileFormatsMatch(ts, files);
          }
          inputs.add(toReadEntity(new Path(fromURI)));
          Task<? extends Serializable> rTask = null;
    @@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }
        }

    - private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
    + private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
          final Class<? extends InputFormat> destInputFormat;
          try {
            if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
    @@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          // Other file formats should do similar check to make sure file formats match
          // when doing LOAD DATA .. INTO TABLE
          if (OrcInputFormat.class.equals(destInputFormat)) {
    - Path inputFilePath = new Path(fromURI);
    - try {
    - FileSystem fs = FileSystem.get(fromURI, conf);
    - // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    - OrcFile.createReader(fs, inputFilePath);
    - } catch (FileFormatException e) {
    - throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    - " table is stored as ORC but the file being loaded is not a valid ORC file."));
    - } catch (IOException e) {
    - throw new SemanticException("Unable to load data to destination table." +
    - " Error: " + e.getMessage());
    + for (FileStatus fileStatus : fileStatuses) {
    + try {
    + Path filePath = fileStatus.getPath();
    + FileSystem fs = FileSystem.get(filePath.toUri(), conf);
    + // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    + OrcFile.createReader(fs, filePath);
    + } catch (FileFormatException e) {
    + throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    + " table is stored as ORC but the file being loaded is not a valid ORC file."));
    + } catch (IOException e) {
    + throw new SemanticException("Unable to load data to destination table." +
    + " Error: " + e.getMessage());
    + }
            }
          }
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    new file mode 100644
    index 0000000..9a4116e
    --- /dev/null
    +++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    @@ -0,0 +1,6 @@
    +create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data local inpath '../../data/files/kv1.txt' into table text_test;
    +
    +set hive.default.fileformat=ORC;
    +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
    index 0927ea4..2ff884d 100644
    --- a/ql/src/test/queries/clientpositive/load_orc_part.q
    +++ b/ql/src/test/queries/clientpositive/load_orc_part.q
    @@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
      load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
      dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;

    +load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
    +
      alter table orc_test add partition(ds='11');
      alter table orc_test partition(ds='11') set fileformat textfile;
      load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    new file mode 100644
    index 0000000..77fb50e
    --- /dev/null
    +++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    @@ -0,0 +1,25 @@
    +PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orc_test
    +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orc_test
    +FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
    index 34ca493..2e02c2e 100644
    --- a/ql/src/test/results/clientpositive/load_orc_part.q.out
    +++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
    @@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
      POSTHOOK: Output: default@orc_test@ds=10
      Found 2 items
      #### A masked pattern was here ####
    +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_staging
    +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_staging
    +#### A masked pattern was here ####
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_test@ds=10
    +#### A masked pattern was here ####
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_test@ds=10
    +Found 1 items
    +#### A masked pattern was here ####
      PREHOOK: query: alter table orc_test add partition(ds='11')
      PREHOOK: type: ALTERTABLE_ADDPARTS
      PREHOOK: Output: default@orc_test
  • Pxiong at Aug 27, 2015 at 6:27 pm
    Repository: hive
    Updated Branches:
       refs/heads/master fb152e450 -> 9e85bbf27


    HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e85bbf2
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e85bbf2
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e85bbf2

    Branch: refs/heads/master
    Commit: 9e85bbf2780510edda79c247248da57619530577
    Parents: fb152e4
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Thu Aug 27 11:26:25 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Thu Aug 27 11:26:46 2015 -0700

    ----------------------------------------------------------------------
      .../calcite/translator/HiveOpConverter.java | 31 ++++++++++++++++----
      1 file changed, 26 insertions(+), 5 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9e85bbf2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    index 4db9863..1931880 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    @@ -686,13 +686,34 @@ public class HiveOpConverter {
            int numReducers, Operation acidOperation, boolean strictMode,
            List<String> keepColNames) throws SemanticException {
          // 1. Generate RS operator
    - if (input.getSchema().getTableNames().size() != 1) {
    + // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    + // as empty string in table aliases is only allowed for virtual columns.
    + String tableAlias = null;
    + Set<String> tableNames = input.getSchema().getTableNames();
    + for (String tableName : tableNames) {
    + if (tableName != null) {
    + if (tableName.length() == 0) {
    + if (tableAlias == null) {
    + tableAlias = tableName;
    + }
    + } else {
    + if (tableAlias == null || tableAlias.length() == 0) {
    + tableAlias = tableName;
    + } else {
    + if (!tableName.equals(tableAlias)) {
    + throw new SemanticException(
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
    + }
    + }
    + }
    + }
    + }
    + if (tableAlias == null) {
            throw new SemanticException(
    - "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one SelectOp but there is "
    - + input.getSchema().getTableNames().size());
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
          }
    - ReduceSinkOperator rsOp = genReduceSink(input, input.getSchema().getTableNames().iterator()
    - .next(), keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
    + // 1.2 Now generate RS operator
    + ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);

          // 2. Generate backtrack Select operator
          Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames,
  • Prasanthj at Aug 27, 2015 at 10:14 pm
    Repository: hive
    Updated Branches:
       refs/heads/master 9e85bbf27 -> 607b0e8a6


    HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/607b0e8a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/607b0e8a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/607b0e8a

    Branch: refs/heads/master
    Commit: 607b0e8a6b4da164606b87c4d012059276b3a994
    Parents: 9e85bbf
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 17:14:17 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 17:14:17 2015 -0500

    ----------------------------------------------------------------------
      data/conf/tez/hive-site.xml | 4 ++
      ql/src/main/resources/tez-container-log4j2.xml | 49 +++++++++++++++++++++
      2 files changed, 53 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/data/conf/tez/hive-site.xml
    ----------------------------------------------------------------------
    diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
    index e0238aa..2f9415a 100644
    --- a/data/conf/tez/hive-site.xml
    +++ b/data/conf/tez/hive-site.xml
    @@ -253,5 +253,9 @@
        </description>
      </property>

    +<property>
    + <name>hive.tez.java.opts</name>
    + <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
    +</property>

      </configuration>

    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/ql/src/main/resources/tez-container-log4j2.xml
    ----------------------------------------------------------------------
    diff --git a/ql/src/main/resources/tez-container-log4j2.xml b/ql/src/main/resources/tez-container-log4j2.xml
    new file mode 100644
    index 0000000..be949dc
    --- /dev/null
    +++ b/ql/src/main/resources/tez-container-log4j2.xml
    @@ -0,0 +1,49 @@
    +<?xml version="1.0" encoding="UTF-8"?>
    +<!--
    + Licensed to the Apache Software Foundation (ASF) under one or more
    + contributor license agreements. See the NOTICE file distributed with
    + this work for additional information regarding copyright ownership.
    + The ASF licenses this file to You under the Apache License, Version 2.0
    + (the "License"); you may not use this file except in compliance with
    + the License. You may obtain a copy of the License at
    +
    + http://www.apache.org/licenses/LICENSE-2.0
    +
    + Unless required by applicable law or agreed to in writing, software
    + distributed under the License is distributed on an "AS IS" BASIS,
    + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + See the License for the specific language governing permissions and
    + limitations under the License.
    +-->
    +
    +<Configuration status="trace" strict="true" name="TezContainerLog4j2"
    + packages="org.apache.hadoop.hive.ql.log">
    +
    + <Properties>
    + <Property name="tez.container.log.threshold">ALL</Property>
    + <Property name="tez.container.log.level">INFO</Property>
    + <Property name="tez.container.root.logger">CLA</Property>
    + <Property name="tez.container.log.dir">${sys:yarn.app.container.log.dir}</Property>
    + <Property name="tez.container.log.file">syslog</Property>
    + </Properties>
    +
    + <Appenders>
    + <RollingFile name="CLA" fileName="${sys:tez.container.log.dir}/${sys:tez.container.log.file}"
    + filePattern="${sys:tez.container.log.dir}/${sys:tez.container.log.file}.%d{yyyy-MM-dd}">
    + <PatternLayout pattern="%d{ISO8601} %p [%t] %c{2}: %m%n" />
    + <Policies>
    + <!-- rollover@mignight (interval = 1 means daily) -->
    + <TimeBasedTriggeringPolicy interval="1" modulate="true"/>
    + </Policies>
    + <!-- 30-day backup -->
    + <!-- <DefaultRolloverStrategy max="30"/> -->
    + </RollingFile>
    + </Appenders>
    +
    + <Loggers>
    + <Root level="${sys:tez.container.log.threshold}">
    + <AppenderRef ref="${sys:tez.container.root.logger}" level="${sys:tez.container.log.level}"/>
    + </Root>
    + </Loggers>
    +
    +</Configuration>
  • Gopalv at Aug 28, 2015 at 9:44 am
    Repository: hive
    Updated Branches:
       refs/heads/master ce2581680 -> 2ef40ca66


    Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

    This reverts commit b6d1143aa7aaa20de035898f34df2d6b581895b6.


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ef40ca6
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ef40ca6
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ef40ca6

    Branch: refs/heads/master
    Commit: 2ef40ca66ab0b9fbcf9bca5e6b8c5d7bd6d580c6
    Parents: ce25816
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 02:43:44 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 02:43:44 2015 -0700

    ----------------------------------------------------------------------
      .../optimizer/DynamicPartitionPruningOptimization.java | 12 ------------
      1 file changed, 12 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/2ef40ca6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    index 5ebd28a..f475926 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    @@ -189,18 +189,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
            LOG.debug("TableScan: " + ts);
          }

    - if (ts == null) {
    - // could be a reduce sink
    - LOG.warn("Could not find the table scan for " + filter);
    - return null;
    - } else {
    - Table table = ts.getConf().getTableMetadata();
    - if (table != null && !table.isPartitioned()) {
    - // table is not partitioned, skip optimizer
    - return null;
    - }
    - }
    -
          // collect the dynamic pruning conditions
          removerContext.dynLists.clear();
          walkExprTree(desc.getPredicate(), removerContext);
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    Repository: hive
    Updated Branches:
       refs/heads/llap 387fed279 -> 025765382


    HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/037fb02a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/037fb02a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/037fb02a

    Branch: refs/heads/llap
    Commit: 037fb02a8edc1266d0beb02eee1fb90737fc1ef7
    Parents: bb7153f
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 02:27:35 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 02:27:35 2015 +0200

    ----------------------------------------------------------------------
      .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java | 4 ++--
      .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 8 ++++++++
      .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java | 9 +++++----
      3 files changed, 15 insertions(+), 6 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    index 366b74b..6a81170 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    @@ -19,7 +19,7 @@
      package org.apache.hadoop.hive.ql.parse;

      import java.util.ArrayList;
    -import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;

    @@ -53,7 +53,7 @@ public class LeadLagInfo {
        public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
          addLeadLagExpr(llFuncExpr);
          mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
    - new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
    + new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
          List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
          if (funcList == null) {
            funcList = new ArrayList<ExprNodeGenericFuncDesc>();

    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    index 15267b9..328bd86 100755
    --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    @@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
          return typeInfo.hashCode();
        }

    + @Override
    + public final boolean equals(Object o) {
    + // prevent equals from being overridden in sub-classes
    + // always use ExprNodeDescEqualityWrapper
    + // if you need any other equality than Object.equals()
    + return (o == this);
    + }
    +
        public TypeInfo getTypeInfo() {
          return typeInfo;
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    index 33ad3e8..f23facf 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;

      import java.util.ArrayList;
      import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;
      import java.util.Map.Entry;
    @@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
        public ExprWalkerInfo() {
          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
          this.op = op;

          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        /**
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

    Branch: refs/heads/llap
    Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
    Parents: b247cac
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 11:43:25 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 11:43:25 2015 -0500

    ----------------------------------------------------------------------
      .../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +++++++++++---------
      .../queries/clientnegative/load_orc_negative3.q | 6 ++++
      .../test/queries/clientpositive/load_orc_part.q | 4 +++
      .../clientnegative/load_orc_negative3.q.out | 25 +++++++++++++
      .../results/clientpositive/load_orc_part.q.out | 18 ++++++++++
      5 files changed, 75 insertions(+), 16 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    index 85fa9c9..9d2702f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    @@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          return new URI(fromScheme, fromAuthority, path, null, null);
        }

    - private void applyConstraints(URI fromURI, URI toURI, Tree ast,
    + private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
            boolean isLocal) throws SemanticException {

    + FileStatus[] srcs = null;
    +
          // local mode implies that scheme should be "file"
          // we can change this going forward
          if (isLocal && !fromURI.getScheme().equals("file")) {
    @@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          try {
    - FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
    + srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
            if (srcs == null || srcs.length == 0) {
              throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
                  "No files matching path " + fromURI));
    @@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
                + "\"hive.metastore.warehouse.dir\" do not conflict.";
            throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
          }
    +
    + return srcs;
        }

        @Override
    @@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          // make sure the arguments make sense
    - applyConstraints(fromURI, toURI, fromTree, isLocal);
    + FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);

          // for managed tables, make sure the file formats match
          if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
    - ensureFileFormatsMatch(ts, fromURI);
    + ensureFileFormatsMatch(ts, files);
          }
          inputs.add(toReadEntity(new Path(fromURI)));
          Task<? extends Serializable> rTask = null;
    @@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }
        }

    - private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
    + private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
          final Class<? extends InputFormat> destInputFormat;
          try {
            if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
    @@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          // Other file formats should do similar check to make sure file formats match
          // when doing LOAD DATA .. INTO TABLE
          if (OrcInputFormat.class.equals(destInputFormat)) {
    - Path inputFilePath = new Path(fromURI);
    - try {
    - FileSystem fs = FileSystem.get(fromURI, conf);
    - // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    - OrcFile.createReader(fs, inputFilePath);
    - } catch (FileFormatException e) {
    - throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    - " table is stored as ORC but the file being loaded is not a valid ORC file."));
    - } catch (IOException e) {
    - throw new SemanticException("Unable to load data to destination table." +
    - " Error: " + e.getMessage());
    + for (FileStatus fileStatus : fileStatuses) {
    + try {
    + Path filePath = fileStatus.getPath();
    + FileSystem fs = FileSystem.get(filePath.toUri(), conf);
    + // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    + OrcFile.createReader(fs, filePath);
    + } catch (FileFormatException e) {
    + throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    + " table is stored as ORC but the file being loaded is not a valid ORC file."));
    + } catch (IOException e) {
    + throw new SemanticException("Unable to load data to destination table." +
    + " Error: " + e.getMessage());
    + }
            }
          }
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    new file mode 100644
    index 0000000..9a4116e
    --- /dev/null
    +++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    @@ -0,0 +1,6 @@
    +create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data local inpath '../../data/files/kv1.txt' into table text_test;
    +
    +set hive.default.fileformat=ORC;
    +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
    index 0927ea4..2ff884d 100644
    --- a/ql/src/test/queries/clientpositive/load_orc_part.q
    +++ b/ql/src/test/queries/clientpositive/load_orc_part.q
    @@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
      load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
      dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;

    +load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
    +
      alter table orc_test add partition(ds='11');
      alter table orc_test partition(ds='11') set fileformat textfile;
      load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    new file mode 100644
    index 0000000..77fb50e
    --- /dev/null
    +++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    @@ -0,0 +1,25 @@
    +PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orc_test
    +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orc_test
    +FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
    index 34ca493..2e02c2e 100644
    --- a/ql/src/test/results/clientpositive/load_orc_part.q.out
    +++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
    @@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
      POSTHOOK: Output: default@orc_test@ds=10
      Found 2 items
      #### A masked pattern was here ####
    +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_staging
    +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_staging
    +#### A masked pattern was here ####
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_test@ds=10
    +#### A masked pattern was here ####
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_test@ds=10
    +Found 1 items
    +#### A masked pattern was here ####
      PREHOOK: query: alter table orc_test add partition(ds='11')
      PREHOOK: type: ALTERTABLE_ADDPARTS
      PREHOOK: Output: default@orc_test
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

    Branch: refs/heads/llap
    Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
    Parents: 037fb02
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 09:50:08 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 09:50:08 2015 +0200

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
      .../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
      .../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
      .../queries/clientpositive/flatten_and_or.q | 4 +-
      .../test/queries/clientpositive/pointlookup.q | 59 +
      .../test/queries/clientpositive/pointlookup2.q | 51 +
      .../alter_partition_coltype.q.out | 12 +-
      .../clientpositive/annotate_stats_filter.q.out | 8 +-
      .../results/clientpositive/flatten_and_or.q.out | 8 +-
      ql/src/test/results/clientpositive/pcr.q.out | 12 +-
      .../results/clientpositive/pointlookup.q.out | 198 +++
      .../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
      .../results/clientpositive/ppd_transform.q.out | 12 +-
      .../test/results/clientpositive/spark/pcr.q.out | 12 +-
      .../clientpositive/spark/ppd_transform.q.out | 12 +-
      .../clientpositive/spark/vectorized_case.q.out | 2 +-
      .../clientpositive/tez/explainuser_1.q.out | 2 +-
      .../clientpositive/tez/vectorized_case.q.out | 2 +-
      .../clientpositive/vectorized_case.q.out | 9 +-
      19 files changed, 2118 insertions(+), 55 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index 8706a2d..8a00079 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
              "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."),
          HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
               "Whether to transform OR clauses in Filter operators into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
    + "Minimum number of OR clauses needed to transform into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
    + "Extract partial expressions when optimizing point lookup IN clauses"),
          // Constant propagation optimizer
          HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
          HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    index 14f362f..439f616 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    @@ -68,6 +68,18 @@ public class Optimizer {

          // Add the transformation that computes the lineage information.
          transformations.add(new Generator());
    +
    + // Try to transform OR predicates in Filter into simpler IN clauses first
    + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    + final int min = HiveConf.getIntVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
    + final boolean extract = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
    + final boolean testMode = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVE_IN_TEST);
    + transformations.add(new PointLookupOptimizer(min, extract, testMode));
    + }
    +
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PredicateTransitivePropagate());
            if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
    @@ -82,11 +94,6 @@ public class Optimizer {
              transformations.add(new ConstantPropagate());
          }

    - // Try to transform OR predicates in Filter into IN clauses.
    - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    - transformations.add(new PointLookupOptimizer());
    - }
    -
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PartitionPruner());
            transformations.add(new PartitionConditionRemover());

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    index 6a8acec..d83636d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    @@ -18,10 +18,14 @@
      package org.apache.hadoop.hive.ql.optimizer;

      import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.Comparator;
      import java.util.HashMap;
    +import java.util.HashSet;
      import java.util.LinkedHashMap;
      import java.util.List;
      import java.util.Map;
    +import java.util.Set;
      import java.util.Stack;

      import org.apache.calcite.util.Pair;
    @@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
      import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
    +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
      import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
    +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

      import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.ImmutableSortedSet;
      import com.google.common.collect.ListMultimap;

      /**
    @@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
                GenericUDFIn.class.getAnnotation(Description.class).name();
        private static final String STRUCT_UDF =
                GenericUDFStruct.class.getAnnotation(Description.class).name();
    + private static final String AND_UDF =
    + GenericUDFOPAnd.class.getAnnotation(Description.class).name();
    +
    + // these are closure-bound for all the walkers in context
    + public final int minOrExpr;
    + public final boolean extract;
    + public final boolean testMode;
    +
    + /*
    + * Pass in configs and pre-create a parse context
    + */
    + public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
    + this.minOrExpr = min;
    + this.extract = extract;
    + this.testMode = testMode;
    + }
    +
    + // Hash Set iteration isn't ordered, but force string sorted order
    + // to get a consistent test run.
    + private Collection<ExprNodeDescEqualityWrapper> sortForTests(
    + Set<ExprNodeDescEqualityWrapper> valuesExpr) {
    + if (!testMode) {
    + // normal case - sorting is wasted for an IN()
    + return valuesExpr;
    + }
    + final Collection<ExprNodeDescEqualityWrapper> sortedValues;
    +
    + sortedValues = ImmutableSortedSet.copyOf(
    + new Comparator<ExprNodeDescEqualityWrapper>() {
    + @Override
    + public int compare(ExprNodeDescEqualityWrapper w1,
    + ExprNodeDescEqualityWrapper w2) {
    + // fail if you find nulls (this is a test-code section)
    + if (w1.equals(w2)) {
    + return 0;
    + }
    + return w1.getExprNodeDesc().getExprString()
    + .compareTo(w2.getExprNodeDesc().getExprString());
    + }
    + }, valuesExpr);

    + return sortedValues;
    + }

        @Override
        public ParseContext transform(ParseContext pctx) throws SemanticException {
    @@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Generated new predicate with IN clause: " + newPredicate);
              }
    - filterOp.getConf().setOrigPredicate(predicate);
    + if (!extract) {
    + filterOp.getConf().setOrigPredicate(predicate);
    + }
              filterOp.getConf().setPredicate(newPredicate);
            }

    @@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
              return null;
            }

    - // 2. It is an OR operator
    + // 2. It is an OR operator with enough children
            List<ExprNodeDesc> children = fd.getChildren();
    + if (children.size() < minOrExpr) {
    + return null;
    + }
            ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
                    ArrayListMultimap.create();
            boolean modeAnd = false;
    @@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
            newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
                    FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);

    + if (extract && columns.size() > 1) {
    + final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
    +
    + // extract pre-conditions for the tuple expressions
    + // (a,b) IN ((1,2),(2,3)) ->
    + // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
    +
    + for (String keyString : columnConstantsMap.keySet()) {
    + final Set<ExprNodeDescEqualityWrapper> valuesExpr =
    + new HashSet<ExprNodeDescEqualityWrapper>(children.size());
    + final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial =
    + columnConstantsMap.get(keyString);
    + for (int i = 0; i < children.size(); i++) {
    + Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
    + .get(i);
    + valuesExpr
    + .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
    + }
    + ExprNodeColumnDesc lookupCol = partial.get(0).left;
    + // generate a partial IN clause, if the column is a partition column
    + if (lookupCol.getIsPartitionColOrVirtualCol()
    + || valuesExpr.size() < children.size()) {
    + // optimize only nDV reductions
    + final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
    + inExpr.add(lookupCol);
    + for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
    + inExpr.add(value.getExprNodeDesc());
    + }
    + subExpr.add(new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
    + }
    + }
    + // loop complete, inspect the sub expressions generated
    + if (subExpr.size() > 0) {
    + // add the newPredicate to the end & produce an AND clause
    + subExpr.add(newPredicate);
    + newPredicate = new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
    + }
    + // else, newPredicate is unmodified
    + }
    +
            return newPredicate;
          }


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
    index 6d65225..6c6e0f9 100644
    --- a/ql/src/test/queries/clientpositive/flatten_and_or.q
    +++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
    @@ -1,3 +1,5 @@
    +set hive.optimize.point.lookup=false;
    +
      explain
      SELECT key
      FROM src
    @@ -14,4 +16,4 @@ WHERE
         AND value = '1') OR (key = '9'
         AND value = '1') OR (key = '10'
         AND value = '3'))
    -;
    \ No newline at end of file
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
    new file mode 100644
    index 0000000..1aef2ef
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup.q
    @@ -0,0 +1,59 @@
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +
    +set hive.optimize.point.lookup.min=3;
    +set hive.optimize.point.lookup.extract=false;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
    new file mode 100644
    index 0000000..31bebbb
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup2.q
    @@ -0,0 +1,51 @@
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    +
    +create table pcr_t1 (key int, value string) partitioned by (ds string);
    +insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
    +
    +create table pcr_t2 (ds string, key int, value string);
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
    +
    +set hive.optimize.point.lookup.min=2;
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds;
    +
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    index 06515da..9fc3c8d 100644
    --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    @@ -1134,15 +1134,11 @@ STAGE PLANS:
                alias: alterdynamic_part_table
                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
                GatherStats: false
    - Filter Operator
    - isSamplingPred: false
    - predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
    + Select Operator
    + expressions: intcol (type: string)
    + outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - Select Operator
    - expressions: intcol (type: string)
    - outputColumnNames: _col0
    - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - ListSink
    + ListSink

      PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
      PREHOOK: type: QUERY

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    index 9e0e78a..054b573 100644
    --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    @@ -678,15 +678,15 @@ STAGE PLANS:
                  alias: loc_orc
                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
                  Filter Operator
    - predicate: (state) IN ('OH', 'CA') (type: boolean)
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                    Select Operator
                      expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                      outputColumnNames: _col0, _col1, _col2, _col3
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    index 5f25daa..9c51ff3 100644
    --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
    +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    @@ -44,15 +44,15 @@ STAGE PLANS:
                  alias: src
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string)
                      outputColumnNames: _col0
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
    index 4c9ea77..d7c40a3 100644
    --- a/ql/src/test/results/clientpositive/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/pcr.q.out
    @@ -2475,16 +2475,16 @@ STAGE PLANS:
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: int), value (type: string), ds (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                        sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                        tag: -1
                        auto parallelism: false
            Path -> Alias:
    @@ -2588,13 +2588,13 @@ STAGE PLANS:
              Select Operator
                expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
                  GlobalTableId: 0
      #### A masked pattern was here ####
                  NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
    new file mode 100644
    index 0000000..7e19be4
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup.q.out
    @@ -0,0 +1,198 @@
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
    new file mode 100644
    index 0000000..55edd90
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
    @@ -0,0 +1,1647 @@
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + key
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + value
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + ds
    + TOK_WHERE
    + or
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-08'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 1
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-09'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + key
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + value
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: pcr_t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int), value (type: string), ds (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [pcr_t1]
    + /pcr_t1/ds=2000-04-09 [pcr_t1]
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2
    + columns.types int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1, t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-10
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-10
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t1/ds=2000-04-10 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + sort order: +++
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t2
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t2
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
    index f536767..17248e4 100644
    --- a/ql/src/test/results/clientpositive/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
    @@ -390,21 +390,21 @@ STAGE PLANS:
                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
    index 5aa0df8..fb08f10 100644
    --- a/ql/src/test/results/clientpositive/spark/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
    @@ -2534,16 +2534,16 @@ STAGE PLANS:
                        GatherStats: false
                        Filter Operator
                          isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: int), value (type: string), ds (type: string)
                            outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                              sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                              tag: -1
                              auto parallelism: false
                  Path -> Alias:
    @@ -2648,13 +2648,13 @@ STAGE PLANS:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
                        GlobalTableId: 0
      #### A masked pattern was here ####
                        NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    index a6e6e38..52a847a 100644
    --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    @@ -405,21 +405,21 @@ STAGE PLANS:
                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                            Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index e8a9786..9756b0c 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -2909,7 +2909,7 @@ Stage-0
            Select Operator [SEL_2]
               outputColumnNames:["_col0"]
               Filter Operator [FIL_4]
    - predicate:(c_int) IN (-6, 6) (type: boolean)
    + predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
                  TableScan [TS_0]
                     alias:cbo_t1


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
    index 9e47014..73bf12d 100644
    --- a/ql/src/test/results/clientpositive/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
    @@ -46,19 +46,20 @@ STAGE PLANS:
                  alias: alltypesorc
                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized

        Stage: Stage-0
          Fetch Operator
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11123 : Fix how to confirm the RDBMS product name at Metastore. (Shinichi Yamashita, reviewed by Sergey Shelukhin and Deepesh Khandelwal)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fb152e45
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fb152e45
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fb152e45

    Branch: refs/heads/llap
    Commit: fb152e45064fcb2846b198ba14e7f7cc13ada4bb
    Parents: 9670a2b
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Thu Aug 27 10:54:58 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Thu Aug 27 10:54:58 2015 -0700

    ----------------------------------------------------------------------
      .../hive/metastore/MetaStoreDirectSql.java | 40 +++++++-------------
      1 file changed, 13 insertions(+), 27 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/fb152e45/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
    ----------------------------------------------------------------------
    diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
    index 5776ec6..522fcc2 100644
    --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
    +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
    @@ -148,16 +148,16 @@ class MetaStoreDirectSql {

        private DB determineDbType() {
          DB dbType = DB.OTHER;
    - if (runDbCheck("SET @@session.sql_mode=ANSI_QUOTES", "MySql")) {
    - dbType = DB.MYSQL;
    - } else if (runDbCheck("SELECT version FROM v$instance", "Oracle")) {
    - dbType = DB.ORACLE;
    - } else if (runDbCheck("SELECT @@version", "MSSQL")) {
    - dbType = DB.MSSQL;
    - } else {
    - // TODO: maybe we should use getProductName to identify all the DBs
    - String productName = getProductName();
    - if (productName != null && productName.toLowerCase().contains("derby")) {
    + String productName = getProductName();
    + if (productName != null) {
    + productName = productName.toLowerCase();
    + if (productName.contains("mysql")) {
    + dbType = DB.MYSQL;
    + } else if (productName.contains("oracle")) {
    + dbType = DB.ORACLE;
    + } else if (productName.contains("microsoft sql server")) {
    + dbType = DB.MSSQL;
    + } else if (productName.contains("derby")) {
              dbType = DB.DERBY;
            }
          }
    @@ -210,6 +210,9 @@ class MetaStoreDirectSql {

        private boolean runTestQuery() {
          Transaction tx = pm.currentTransaction();
    + if (!tx.isActive()) {
    + tx.begin();
    + }
          Query query = null;
          // Run a self-test query. If it doesn't work, we will self-disable. What a PITA...
          String selfTestQuery = "select \"DB_ID\" from \"DBS\"";
    @@ -261,23 +264,6 @@ class MetaStoreDirectSql {
          }
        }

    - private boolean runDbCheck(String queryText, String name) {
    - Transaction tx = pm.currentTransaction();
    - if (!tx.isActive()) {
    - tx.begin();
    - }
    - try {
    - executeNoResult(queryText);
    - return true;
    - } catch (Throwable t) {
    - LOG.debug(name + " check failed, assuming we are not on " + name + ": " + t.getMessage());
    - tx.rollback();
    - tx = pm.currentTransaction();
    - tx.begin();
    - return false;
    - }
    - }
    -
        public Database getDatabase(String dbName) throws MetaException{
          Query queryDbSelector = null;
          Query queryDbParams = null;
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce258168
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce258168
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce258168

    Branch: refs/heads/llap
    Commit: ce2581680f1c109ea0a43868e0345a15b06b41c8
    Parents: b6d1143
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 01:24:32 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 01:24:32 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java | 2 +-
      .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java | 2 +-
      .../vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java | 4 ++--
      .../ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java | 4 ++--
      .../exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java | 4 ++--
      .../ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java | 4 ++--
      6 files changed, 10 insertions(+), 10 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    index 626cea5..aff3551 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    @@ -143,7 +143,7 @@ public class VectorHashKeyWrapper extends KeyWrapper {
          for (int i = 0; i < byteValues.length; ++i) {
            // the byte comparison is potentially expensive so is better to branch on null
            if (!isNull[longValues.length + doubleValues.length + i]) {
    - if (0 != StringExpr.compare(
    + if (!StringExpr.equal(
                  byteValues[i],
                  byteStarts[i],
                  byteLengths[i],

    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    index a21162b..6383e8a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    @@ -82,7 +82,7 @@ public class CuckooSetBytes {
        }

        private static boolean entryEqual(byte[][] t, int hash, byte[] b, int start, int len) {
    - return t[hash] != null && StringExpr.compare(t[hash], 0, t[hash].length, b, start, len) == 0;
    + return t[hash] != null && StringExpr.equal(t[hash], 0, t[hash].length, b, start, len);
        }

        public void insert(byte[] x) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    index 87a11c0..9f2d4c3 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    @@ -234,8 +234,8 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    index 9f10ff1..5a5d54f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    @@ -229,8 +229,8 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    index 9ff1141..e9ce739 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    @@ -230,8 +230,8 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    index 49efe1a..dfdd6d7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    @@ -290,8 +290,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
                   */

                  if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    // New key.

                    if (haveSaveKey) {
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e85bbf2
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e85bbf2
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e85bbf2

    Branch: refs/heads/llap
    Commit: 9e85bbf2780510edda79c247248da57619530577
    Parents: fb152e4
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Thu Aug 27 11:26:25 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Thu Aug 27 11:26:46 2015 -0700

    ----------------------------------------------------------------------
      .../calcite/translator/HiveOpConverter.java | 31 ++++++++++++++++----
      1 file changed, 26 insertions(+), 5 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9e85bbf2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    index 4db9863..1931880 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    @@ -686,13 +686,34 @@ public class HiveOpConverter {
            int numReducers, Operation acidOperation, boolean strictMode,
            List<String> keepColNames) throws SemanticException {
          // 1. Generate RS operator
    - if (input.getSchema().getTableNames().size() != 1) {
    + // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    + // as empty string in table aliases is only allowed for virtual columns.
    + String tableAlias = null;
    + Set<String> tableNames = input.getSchema().getTableNames();
    + for (String tableName : tableNames) {
    + if (tableName != null) {
    + if (tableName.length() == 0) {
    + if (tableAlias == null) {
    + tableAlias = tableName;
    + }
    + } else {
    + if (tableAlias == null || tableAlias.length() == 0) {
    + tableAlias = tableName;
    + } else {
    + if (!tableName.equals(tableAlias)) {
    + throw new SemanticException(
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
    + }
    + }
    + }
    + }
    + }
    + if (tableAlias == null) {
            throw new SemanticException(
    - "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one SelectOp but there is "
    - + input.getSchema().getTableNames().size());
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
          }
    - ReduceSinkOperator rsOp = genReduceSink(input, input.getSchema().getTableNames().iterator()
    - .next(), keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
    + // 1.2 Now generate RS operator
    + ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);

          // 2. Generate backtrack Select operator
          Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames,
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b6d1143a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b6d1143a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b6d1143a

    Branch: refs/heads/llap
    Commit: b6d1143aa7aaa20de035898f34df2d6b581895b6
    Parents: d147a79
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 01:22:45 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 01:22:45 2015 -0700

    ----------------------------------------------------------------------
      .../optimizer/DynamicPartitionPruningOptimization.java | 12 ++++++++++++
      1 file changed, 12 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b6d1143a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    index f475926..5ebd28a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    @@ -189,6 +189,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
            LOG.debug("TableScan: " + ts);
          }

    + if (ts == null) {
    + // could be a reduce sink
    + LOG.warn("Could not find the table scan for " + filter);
    + return null;
    + } else {
    + Table table = ts.getConf().getTableMetadata();
    + if (table != null && !table.isPartitioned()) {
    + // table is not partitioned, skip optimizer
    + return null;
    + }
    + }
    +
          // collect the dynamic pruning conditions
          removerContext.dynLists.clear();
          walkExprTree(desc.getPredicate(), removerContext);
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11682: LLAP: Merge master into branch (Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/02576538
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/02576538
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/02576538

    Branch: refs/heads/llap
    Commit: 02576538282b9d1a0454f268b7ef181ee259184d
    Parents: 387fed2 2ef40ca
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Fri Aug 28 13:07:12 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Fri Aug 28 13:07:12 2015 -0500

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
      data/conf/tez/hive-site.xml | 20 +-
      .../hive/metastore/MetaStoreDirectSql.java | 40 +-
      .../ql/exec/vector/VectorHashKeyWrapper.java | 2 +-
      .../exec/vector/expressions/CuckooSetBytes.java | 2 +-
      ...VectorMapJoinInnerBigOnlyStringOperator.java | 4 +-
      .../VectorMapJoinInnerStringOperator.java | 4 +-
      .../VectorMapJoinLeftSemiStringOperator.java | 4 +-
      .../VectorMapJoinOuterStringOperator.java | 4 +-
      .../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
      .../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
      .../calcite/translator/HiveOpConverter.java | 31 +-
      .../hadoop/hive/ql/parse/LeadLagInfo.java | 4 +-
      .../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +-
      .../hadoop/hive/ql/plan/ExprNodeDesc.java | 8 +
      .../hadoop/hive/ql/ppd/ExprWalkerInfo.java | 136 +-
      .../hive/ql/ppd/ExprWalkerProcFactory.java | 92 +-
      .../hadoop/hive/ql/ppd/OpProcFactory.java | 11 +-
      ql/src/main/resources/tez-container-log4j2.xml | 49 +
      .../queries/clientnegative/load_orc_negative3.q | 6 +
      .../queries/clientpositive/flatten_and_or.q | 4 +-
      .../test/queries/clientpositive/load_orc_part.q | 4 +
      .../test/queries/clientpositive/pointlookup.q | 59 +
      .../test/queries/clientpositive/pointlookup2.q | 51 +
      .../clientnegative/load_orc_negative3.q.out | 25 +
      .../alter_partition_coltype.q.out | 12 +-
      .../clientpositive/annotate_stats_filter.q.out | 8 +-
      .../results/clientpositive/flatten_and_or.q.out | 8 +-
      .../results/clientpositive/load_orc_part.q.out | 18 +
      ql/src/test/results/clientpositive/pcr.q.out | 12 +-
      .../results/clientpositive/pointlookup.q.out | 198 +++
      .../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
      .../results/clientpositive/ppd_transform.q.out | 12 +-
      .../test/results/clientpositive/spark/pcr.q.out | 12 +-
      .../clientpositive/spark/ppd_transform.q.out | 12 +-
      .../clientpositive/spark/vectorized_case.q.out | 2 +-
      .../clientpositive/tez/explainuser_1.q.out | 2 +-
      .../clientpositive/tez/vectorized_case.q.out | 2 +-
      .../clientpositive/vectorized_case.q.out | 9 +-
      39 files changed, 2404 insertions(+), 271 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/02576538/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11627: Reduce the number of accesses to hashmaps in PPD (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d147a79c
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d147a79c
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d147a79c

    Branch: refs/heads/llap
    Commit: d147a79c13a9fdde83372c740167236eb80962de
    Parents: 607b0e8
    Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Authored: Thu Aug 27 18:07:48 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Fri Aug 28 10:09:38 2015 +0200

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/ppd/ExprWalkerInfo.java | 127 ++++---------------
      .../hive/ql/ppd/ExprWalkerProcFactory.java | 92 +++++++++-----
      .../hadoop/hive/ql/ppd/OpProcFactory.java | 11 +-
      3 files changed, 93 insertions(+), 137 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    index f23facf..e4b768e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    @@ -38,29 +38,21 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
      public class ExprWalkerInfo implements NodeProcessorCtx {

        /** Information maintained for an expr while walking an expr tree. */
    - private static class ExprInfo {
    + protected class ExprInfo {
          /**
           * true if expr rooted at this node doesn't contain more than one table.
           * alias
           */
    - public boolean isCandidate = false;
    + protected boolean isCandidate = false;
          /** alias that this expression refers to. */
    - public String alias = null;
    + protected String alias = null;
          /** new expr for this expression. */
    - public ExprNodeDesc convertedExpr = null;
    + protected ExprNodeDesc convertedExpr = null;

    - public ExprInfo() {
    - }

    - public ExprInfo(boolean isCandidate, String alias, ExprNodeDesc replacedNode) {
    - this.isCandidate = isCandidate;
    - this.alias = alias;
    - convertedExpr = replacedNode;
    - }
        }

    - protected static final Log LOG = LogFactory.getLog(OpProcFactory.class
    - .getName());;
    + protected static final Log LOG = LogFactory.getLog(OpProcFactory.class.getName());
        private Operator<? extends OperatorDesc> op = null;

        /**
    @@ -127,105 +119,33 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
        }

        /**
    - * @return converted expression for give node. If there is none then returns
    - * null.
    - */
    - public ExprNodeDesc getConvertedNode(ExprNodeDesc nd) {
    - ExprInfo ei = exprInfoMap.get(nd);
    - if (ei == null) {
    - return null;
    - }
    - return ei.convertedExpr;
    - }
    -
    - /**
    - * adds a replacement node for this expression.
    - *
    - * @param oldNode
    - * original node
    - * @param newNode
    - * new node
    + * Get additional info for a given expression node
         */
    - public void addConvertedNode(ExprNodeDesc oldNode, ExprNodeDesc newNode) {
    - ExprInfo ei = exprInfoMap.get(oldNode);
    - if (ei == null) {
    - ei = new ExprInfo();
    - exprInfoMap.put(oldNode, ei);
    - }
    - ei.convertedExpr = newNode;
    - exprInfoMap.put(newNode, new ExprInfo(ei.isCandidate, ei.alias, null));
    + public ExprInfo getExprInfo(ExprNodeDesc expr) {
    + return exprInfoMap.get(expr);
        }

        /**
    - * Returns true if the specified expression is pushdown candidate else false.
    - *
    - * @param expr
    - * @return true or false
    + * Get additional info for a given expression node if it
    + * exists, or create a new one and store it if it does not
         */
    - public boolean isCandidate(ExprNodeDesc expr) {
    - ExprInfo ei = exprInfoMap.get(expr);
    - if (ei == null) {
    - return false;
    - }
    - return ei.isCandidate;
    + public ExprInfo addExprInfo(ExprNodeDesc expr) {
    + ExprInfo exprInfo = new ExprInfo();
    + exprInfoMap.put(expr, exprInfo);
    + return exprInfo;
        }

        /**
    - * Marks the specified expr to the specified value.
    - *
    - * @param expr
    - * @param b
    - * can
    + * Get additional info for a given expression node if it
    + * exists, or create a new one and store it if it does not
         */
    - public void setIsCandidate(ExprNodeDesc expr, boolean b) {
    - ExprInfo ei = exprInfoMap.get(expr);
    - if (ei == null) {
    - ei = new ExprInfo();
    - exprInfoMap.put(expr, ei);
    + public ExprInfo addOrGetExprInfo(ExprNodeDesc expr) {
    + ExprInfo exprInfo = exprInfoMap.get(expr);
    + if (exprInfo == null) {
    + exprInfo = new ExprInfo();
    + exprInfoMap.put(expr, exprInfo);
          }
    - ei.isCandidate = b;
    - }
    -
    - /**
    - * Returns the alias of the specified expr.
    - *
    - * @param expr
    - * @return The alias of the expression
    - */
    - public String getAlias(ExprNodeDesc expr) {
    - ExprInfo ei = exprInfoMap.get(expr);
    - if (ei == null) {
    - return null;
    - }
    - return ei.alias;
    - }
    -
    - /**
    - * Adds the specified alias to the specified expr.
    - *
    - * @param expr
    - * @param alias
    - */
    - public void addAlias(ExprNodeDesc expr, String alias) {
    - if (alias == null) {
    - return;
    - }
    - ExprInfo ei = exprInfoMap.get(expr);
    - if (ei == null) {
    - ei = new ExprInfo();
    - exprInfoMap.put(expr, ei);
    - }
    - ei.alias = alias;
    - }
    -
    - /**
    - * Adds the specified expr as the top-most pushdown expr (ie all its children
    - * can be pushed).
    - *
    - * @param expr
    - */
    - public void addFinalCandidate(ExprNodeDesc expr) {
    - addFinalCandidate(getAlias(expr), expr);
    + return exprInfo;
        }

        public void addFinalCandidate(String alias, ExprNodeDesc expr) {
    @@ -278,8 +198,7 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
         *
         * @param expr
         */
    - public void addNonFinalCandidate(ExprNodeDesc expr) {
    - String alias = getAlias(expr);
    + public void addNonFinalCandidate(String alias, ExprNodeDesc expr) {
          if (nonFinalPreds.get(alias) == null) {
            nonFinalPreds.put(alias, new ArrayList<ExprNodeDesc>());
          }

    http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    index 6a1bef9..64efbdd 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    @@ -38,8 +38,6 @@ import org.apache.hadoop.hive.ql.lib.Node;
      import org.apache.hadoop.hive.ql.lib.NodeProcessor;
      import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
      import org.apache.hadoop.hive.ql.lib.Rule;
    -import org.apache.hadoop.hive.ql.lib.RuleExactMatch;
    -import org.apache.hadoop.hive.ql.lib.RuleRegExp;
      import org.apache.hadoop.hive.ql.lib.TypeRule;
      import org.apache.hadoop.hive.ql.parse.SemanticException;
      import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
    @@ -47,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
      import org.apache.hadoop.hive.ql.plan.OperatorDesc;
    +import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;

      /**
       * Expression factory for predicate pushdown processing. Each processor
    @@ -55,8 +54,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
       */
      public final class ExprWalkerProcFactory {

    - private static final Log LOG = LogFactory
    - .getLog(ExprWalkerProcFactory.class.getName());
    + private static final Log LOG = LogFactory.getLog(ExprWalkerProcFactory.class.getName());

        /**
         * ColumnExprProcessor.
    @@ -80,6 +78,7 @@ public final class ExprWalkerProcFactory {
              tabAlias = ci.getTabAlias();
            }

    + ExprInfo colExprInfo = null;
            boolean isCandidate = true;
            if (op.getColumnExprMap() != null) {
              // replace the output expression with the input expression so that
    @@ -88,7 +87,8 @@ public final class ExprWalkerProcFactory {
              if (exp == null) {
                // means that expression can't be pushed either because it is value in
                // group by
    - ctx.setIsCandidate(colref, false);
    + colExprInfo = ctx.addOrGetExprInfo(colref);
    + colExprInfo.isCandidate = false;
                return false;
              } else {
                if (exp instanceof ExprNodeGenericFuncDesc) {
    @@ -99,16 +99,25 @@ public final class ExprWalkerProcFactory {
                  tabAlias = column.getTabAlias();
                }
              }
    - ctx.addConvertedNode(colref, exp);
    - ctx.setIsCandidate(exp, isCandidate);
    - ctx.addAlias(exp, tabAlias);
    + colExprInfo = ctx.addOrGetExprInfo(colref);
    + colExprInfo.convertedExpr = exp;
    + ExprInfo expInfo = ctx.addExprInfo(exp);
    + expInfo.isCandidate = isCandidate;
    + if (tabAlias != null) {
    + expInfo.alias = tabAlias;
    + } else {
    + expInfo.alias = colExprInfo.alias;
    + }
            } else {
              if (ci == null) {
                return false;
              }
    - ctx.addAlias(colref, tabAlias);
    + colExprInfo = ctx.addOrGetExprInfo(colref);
    + if (tabAlias != null) {
    + colExprInfo.alias = tabAlias;
    + }
            }
    - ctx.setIsCandidate(colref, isCandidate);
    + colExprInfo.isCandidate = isCandidate;
            return isCandidate;
          }

    @@ -127,30 +136,37 @@ public final class ExprWalkerProcFactory {
            String alias = null;
            ExprNodeFieldDesc expr = (ExprNodeFieldDesc) nd;

    - boolean isCandidate = true;
            assert (nd.getChildren().size() == 1);
            ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(0);
    - ExprNodeDesc newCh = ctx.getConvertedNode(ch);
    + ExprInfo chExprInfo = ctx.getExprInfo(ch);
    + ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
            if (newCh != null) {
              expr.setDesc(newCh);
              ch = newCh;
    + chExprInfo = ctx.getExprInfo(ch);
            }
    - String chAlias = ctx.getAlias(ch);

    - isCandidate = isCandidate && ctx.isCandidate(ch);
    + boolean isCandidate;
    + String chAlias;
    + if (chExprInfo != null) {
    + chAlias = chExprInfo.alias;
    + isCandidate = chExprInfo.isCandidate;
    + } else {
    + chAlias = null;
    + isCandidate = false;
    + }
            // need to iterate through all children even if one is found to be not a
            // candidate
            // in case if the other children could be individually pushed up
            if (isCandidate && chAlias != null) {
    - if (alias == null) {
    - alias = chAlias;
    - } else if (!chAlias.equalsIgnoreCase(alias)) {
    - isCandidate = false;
    - }
    + alias = chAlias;
            }

    - ctx.addAlias(expr, alias);
    - ctx.setIsCandidate(expr, isCandidate);
    + ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
    + if (alias != null) {
    + exprInfo.alias = alias;
    + }
    + exprInfo.isCandidate = isCandidate;
            return isCandidate;
          }

    @@ -172,7 +188,8 @@ public final class ExprWalkerProcFactory {

            if (!FunctionRegistry.isDeterministic(expr.getGenericUDF())) {
              // this GenericUDF can't be pushed down
    - ctx.setIsCandidate(expr, false);
    + ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
    + exprInfo.isCandidate = false;
              ctx.setDeterministic(false);
              return false;
            }
    @@ -180,14 +197,22 @@ public final class ExprWalkerProcFactory {
            boolean isCandidate = true;
            for (int i = 0; i < nd.getChildren().size(); i++) {
              ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(i);
    - ExprNodeDesc newCh = ctx.getConvertedNode(ch);
    + ExprInfo chExprInfo = ctx.getExprInfo(ch);
    + ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
              if (newCh != null) {
                expr.getChildren().set(i, newCh);
                ch = newCh;
    + chExprInfo = ctx.getExprInfo(ch);
              }
    - String chAlias = ctx.getAlias(ch);

    - isCandidate = isCandidate && ctx.isCandidate(ch);
    + String chAlias;
    + if (chExprInfo != null) {
    + chAlias = chExprInfo.alias;
    + isCandidate = isCandidate && chExprInfo.isCandidate;
    + } else {
    + chAlias = null;
    + isCandidate = false;
    + }
              // need to iterate through all children even if one is found to be not a
              // candidate
              // in case if the other children could be individually pushed up
    @@ -203,8 +228,11 @@ public final class ExprWalkerProcFactory {
                break;
              }
            }
    - ctx.addAlias(expr, alias);
    - ctx.setIsCandidate(expr, isCandidate);
    + ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
    + if (alias != null) {
    + exprInfo.alias = alias;
    + }
    + exprInfo.isCandidate = isCandidate;
            return isCandidate;
          }

    @@ -219,7 +247,8 @@ public final class ExprWalkerProcFactory {
          public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
              Object... nodeOutputs) throws SemanticException {
            ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
    - ctx.setIsCandidate((ExprNodeDesc) nd, true);
    + ExprInfo exprInfo = ctx.addOrGetExprInfo((ExprNodeDesc) nd);
    + exprInfo.isCandidate = true;
            return true;
          }
        }
    @@ -324,12 +353,13 @@ public final class ExprWalkerProcFactory {
            return;
          }

    - if (ctx.isCandidate(expr)) {
    - ctx.addFinalCandidate(expr);
    + ExprInfo exprInfo = ctx.getExprInfo(expr);
    + if (exprInfo != null && exprInfo.isCandidate) {
    + ctx.addFinalCandidate(exprInfo.alias, expr);
            return;
          } else if (!FunctionRegistry.isOpAnd(expr) &&
              HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
    - ctx.addNonFinalCandidate(expr);
    + ctx.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
          }
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/d147a79c/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    index 6f9df53..dbd021b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    @@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.ValueBoundaryDef;
      import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
      import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
      import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
    +import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank.GenericUDAFDenseRankEvaluator;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLead.GenericUDAFLeadEvaluator;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank.GenericUDAFRankEvaluator;
    @@ -483,8 +484,14 @@ public final class OpProcFactory {
                  prunePreds.getFinalCandidates().get(alias)) {
                  // add expr to the list of predicates rejected from further pushing
                  // so that we know to add it in createFilter()
    - prunePreds.addAlias(expr, alias);
    - prunePreds.addNonFinalCandidate(expr);
    + ExprInfo exprInfo;
    + if (alias != null) {
    + exprInfo = prunePreds.addOrGetExprInfo(expr);
    + exprInfo.alias = alias;
    + } else {
    + exprInfo = prunePreds.getExprInfo(expr);
    + }
    + prunePreds.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
                }
                prunePreds.getFinalCandidates().remove(alias);
              }
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    Revert "HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)"

    This reverts commit b6d1143aa7aaa20de035898f34df2d6b581895b6.


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ef40ca6
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ef40ca6
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ef40ca6

    Branch: refs/heads/llap
    Commit: 2ef40ca66ab0b9fbcf9bca5e6b8c5d7bd6d580c6
    Parents: ce25816
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 02:43:44 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 02:43:44 2015 -0700

    ----------------------------------------------------------------------
      .../optimizer/DynamicPartitionPruningOptimization.java | 12 ------------
      1 file changed, 12 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/2ef40ca6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    index 5ebd28a..f475926 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    @@ -189,18 +189,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
            LOG.debug("TableScan: " + ts);
          }

    - if (ts == null) {
    - // could be a reduce sink
    - LOG.warn("Could not find the table scan for " + filter);
    - return null;
    - } else {
    - Table table = ts.getConf().getTableMetadata();
    - if (table != null && !table.isPartitioned()) {
    - // table is not partitioned, skip optimizer
    - return null;
    - }
    - }
    -
          // collect the dynamic pruning conditions
          removerContext.dynLists.clear();
          walkExprTree(desc.getPredicate(), removerContext);
  • Prasanthj at Aug 28, 2015 at 6:11 pm
    HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/607b0e8a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/607b0e8a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/607b0e8a

    Branch: refs/heads/llap
    Commit: 607b0e8a6b4da164606b87c4d012059276b3a994
    Parents: 9e85bbf
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 17:14:17 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 17:14:17 2015 -0500

    ----------------------------------------------------------------------
      data/conf/tez/hive-site.xml | 4 ++
      ql/src/main/resources/tez-container-log4j2.xml | 49 +++++++++++++++++++++
      2 files changed, 53 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/data/conf/tez/hive-site.xml
    ----------------------------------------------------------------------
    diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
    index e0238aa..2f9415a 100644
    --- a/data/conf/tez/hive-site.xml
    +++ b/data/conf/tez/hive-site.xml
    @@ -253,5 +253,9 @@
        </description>
      </property>

    +<property>
    + <name>hive.tez.java.opts</name>
    + <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
    +</property>

      </configuration>

    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/ql/src/main/resources/tez-container-log4j2.xml
    ----------------------------------------------------------------------
    diff --git a/ql/src/main/resources/tez-container-log4j2.xml b/ql/src/main/resources/tez-container-log4j2.xml
    new file mode 100644
    index 0000000..be949dc
    --- /dev/null
    +++ b/ql/src/main/resources/tez-container-log4j2.xml
    @@ -0,0 +1,49 @@
    +<?xml version="1.0" encoding="UTF-8"?>
    +<!--
    + Licensed to the Apache Software Foundation (ASF) under one or more
    + contributor license agreements. See the NOTICE file distributed with
    + this work for additional information regarding copyright ownership.
    + The ASF licenses this file to You under the Apache License, Version 2.0
    + (the "License"); you may not use this file except in compliance with
    + the License. You may obtain a copy of the License at
    +
    + http://www.apache.org/licenses/LICENSE-2.0
    +
    + Unless required by applicable law or agreed to in writing, software
    + distributed under the License is distributed on an "AS IS" BASIS,
    + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + See the License for the specific language governing permissions and
    + limitations under the License.
    +-->
    +
    +<Configuration status="trace" strict="true" name="TezContainerLog4j2"
    + packages="org.apache.hadoop.hive.ql.log">
    +
    + <Properties>
    + <Property name="tez.container.log.threshold">ALL</Property>
    + <Property name="tez.container.log.level">INFO</Property>
    + <Property name="tez.container.root.logger">CLA</Property>
    + <Property name="tez.container.log.dir">${sys:yarn.app.container.log.dir}</Property>
    + <Property name="tez.container.log.file">syslog</Property>
    + </Properties>
    +
    + <Appenders>
    + <RollingFile name="CLA" fileName="${sys:tez.container.log.dir}/${sys:tez.container.log.file}"
    + filePattern="${sys:tez.container.log.dir}/${sys:tez.container.log.file}.%d{yyyy-MM-dd}">
    + <PatternLayout pattern="%d{ISO8601} %p [%t] %c{2}: %m%n" />
    + <Policies>
    + <!-- rollover@mignight (interval = 1 means daily) -->
    + <TimeBasedTriggeringPolicy interval="1" modulate="true"/>
    + </Policies>
    + <!-- 30-day backup -->
    + <!-- <DefaultRolloverStrategy max="30"/> -->
    + </RollingFile>
    + </Appenders>
    +
    + <Loggers>
    + <Root level="${sys:tez.container.log.threshold}">
    + <AppenderRef ref="${sys:tez.container.root.logger}" level="${sys:tez.container.log.level}"/>
    + </Root>
    + </Loggers>
    +
    +</Configuration>

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedAug 27, '15 at 12:28a
activeAug 28, '15 at 6:11p
posts20
users4
websitehive.apache.org

People

Translate

site design / logo © 2021 Grokbase