FAQ
Repository: hive
Updated Branches:
   refs/heads/beeline-cli b90a2ae5d -> fc53e5d7e


HIVE-11366: Avoid right leaning tree hashCode depth in ExprNodeDescEqualityWrapper hashmaps (Gopal V, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e63fc42
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e63fc42
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e63fc42

Branch: refs/heads/beeline-cli
Commit: 3e63fc42054e64455d90ca74cb6ebe30ea0d40ae
Parents: 284859c
Author: Gopal V <gopalv@apache.org>
Authored: Tue Aug 25 14:14:08 2015 -0700
Committer: Gopal V <gopalv@apache.org>
Committed: Tue Aug 25 14:14:08 2015 -0700

----------------------------------------------------------------------
  .../org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 15 ++++++++-------
  1 file changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3e63fc42/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
index 0fe9eda..15267b9 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
@@ -98,21 +98,22 @@ public abstract class ExprNodeDesc implements Serializable, Node {

    // This wraps an instance of an ExprNodeDesc, and makes equals work like isSame, see comment on
    // isSame
- public static class ExprNodeDescEqualityWrapper {
- private ExprNodeDesc exprNodeDesc;
+ public final static class ExprNodeDescEqualityWrapper {
+ private final ExprNodeDesc exprNodeDesc;
+ // beware of any implementation whose hashcode is mutable by reference
+ // inserting into a Map and then changing the hashcode can make it
+ // disappear out of the Map during lookups
+ private final int hashcode;

      public ExprNodeDescEqualityWrapper(ExprNodeDesc exprNodeDesc) {
        this.exprNodeDesc = exprNodeDesc;
+ this.hashcode = exprNodeDesc == null ? 0 : exprNodeDesc.hashCode();
      }

      public ExprNodeDesc getExprNodeDesc() {
        return exprNodeDesc;
      }

- public void setExprNodeDesc(ExprNodeDesc exprNodeDesc) {
- this.exprNodeDesc = exprNodeDesc;
- }
-
      @Override
      public boolean equals(Object other) {

@@ -125,7 +126,7 @@ public abstract class ExprNodeDesc implements Serializable, Node {

      @Override
      public int hashCode() {
- return exprNodeDesc == null ? 0 : exprNodeDesc.hashCode();
+ return hashcode;
      }

      /* helper function to allow Set()/Collection() operations with ExprNodeDesc */

Search Discussions

  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11659: Make Vectorization use the fast StringExpr (Gopal V, reviewed by Matt McCline)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce258168
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce258168
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce258168

    Branch: refs/heads/beeline-cli
    Commit: ce2581680f1c109ea0a43868e0345a15b06b41c8
    Parents: b6d1143
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 01:24:32 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 01:24:32 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java | 2 +-
      .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java | 2 +-
      .../vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java | 4 ++--
      .../ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java | 4 ++--
      .../exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java | 4 ++--
      .../ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java | 4 ++--
      6 files changed, 10 insertions(+), 10 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    index 626cea5..aff3551 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    @@ -143,7 +143,7 @@ public class VectorHashKeyWrapper extends KeyWrapper {
          for (int i = 0; i < byteValues.length; ++i) {
            // the byte comparison is potentially expensive so is better to branch on null
            if (!isNull[longValues.length + doubleValues.length + i]) {
    - if (0 != StringExpr.compare(
    + if (!StringExpr.equal(
                  byteValues[i],
                  byteStarts[i],
                  byteLengths[i],

    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    index a21162b..6383e8a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
    @@ -82,7 +82,7 @@ public class CuckooSetBytes {
        }

        private static boolean entryEqual(byte[][] t, int hash, byte[] b, int start, int len) {
    - return t[hash] != null && StringExpr.compare(t[hash], 0, t[hash].length, b, start, len) == 0;
    + return t[hash] != null && StringExpr.equal(t[hash], 0, t[hash].length, b, start, len);
        }

        public void insert(byte[] x) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    index 87a11c0..9f2d4c3 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    @@ -234,8 +234,8 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    index 9f10ff1..5a5d54f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    @@ -229,8 +229,8 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    index 9ff1141..e9ce739 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    @@ -230,8 +230,8 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
                 */

                if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {

                  // New key.


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce258168/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    index 49efe1a..dfdd6d7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    @@ -290,8 +290,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
                   */

                  if (!haveSaveKey ||
    - StringExpr.compare(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    - vector[batchIndex], start[batchIndex], length[batchIndex]) != 0) {
    + StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex],
    + vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
                    // New key.

                    if (haveSaveKey) {
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11472: ORC StringDirectTreeReader is thrashing the GC due to byte[] allocation per row (Gopal V, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb7153f9
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb7153f9
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb7153f9

    Branch: refs/heads/beeline-cli
    Commit: bb7153f9b1ee2d7e067341d252667edac593e15e
    Parents: 3e63fc4
    Author: Gopal V <gopalv@apache.org>
    Authored: Tue Aug 25 14:19:36 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Tue Aug 25 14:23:02 2015 -0700

    ----------------------------------------------------------------------
      .../hive/ql/io/orc/TreeReaderFactory.java | 18 ++++------
      .../apache/hadoop/hive/shims/Hadoop23Shims.java | 38 ++++++++++++++++++++
      .../apache/hadoop/hive/shims/HadoopShims.java | 22 ++++++++++++
      .../hadoop/hive/shims/HadoopShimsSecure.java | 32 +++++++++++++++++
      4 files changed, 99 insertions(+), 11 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
    index 9bfe268..6d47532 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
    @@ -47,6 +47,8 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
      import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
      import org.apache.hadoop.hive.serde2.io.ShortWritable;
      import org.apache.hadoop.hive.serde2.io.TimestampWritable;
    +import org.apache.hadoop.hive.shims.ShimLoader;
    +import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim;
      import org.apache.hadoop.io.BooleanWritable;
      import org.apache.hadoop.io.BytesWritable;
      import org.apache.hadoop.io.FloatWritable;
    @@ -1486,6 +1488,7 @@ public class TreeReaderFactory {
         */
        protected static class StringDirectTreeReader extends TreeReader {
          protected InStream stream;
    + protected TextReaderShim data;
          protected IntegerReader lengths;
          private final LongColumnVector scratchlcv;

    @@ -1500,6 +1503,7 @@ public class TreeReaderFactory {
            this.stream = data;
            if (length != null && encoding != null) {
              this.lengths = createIntegerReader(encoding, length, false, false);
    + this.data = ShimLoader.getHadoopShims().getTextReaderShim(this.stream);
            }
          }

    @@ -1520,6 +1524,7 @@ public class TreeReaderFactory {
            StreamName name = new StreamName(columnId,
                OrcProto.Stream.Kind.DATA);
            stream = streams.get(name);
    + data = ShimLoader.getHadoopShims().getTextReaderShim(this.stream);
            lengths = createIntegerReader(stripeFooter.getColumnsList().get(columnId).getKind(),
                streams.get(new StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
                false, false);
    @@ -1534,6 +1539,7 @@ public class TreeReaderFactory {
          public void seek(PositionProvider index) throws IOException {
            super.seek(index);
            stream.seek(index);
    + // don't seek data stream
            lengths.seek(index);
          }

    @@ -1548,17 +1554,7 @@ public class TreeReaderFactory {
                result = (Text) previous;
              }
              int len = (int) lengths.next();
    - int offset = 0;
    - byte[] bytes = new byte[len];
    - while (len > 0) {
    - int written = stream.read(bytes, offset, len);
    - if (written < 0) {
    - throw new EOFException("Can't finish byte read from " + stream);
    - }
    - len -= written;
    - offset += written;
    - }
    - result.set(bytes);
    + data.read(result, len);
            }
            return result;
          }

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    ----------------------------------------------------------------------
    diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    index 29d0f13..3292cb3 100644
    --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    @@ -17,8 +17,10 @@
       */
      package org.apache.hadoop.hive.shims;

    +import java.io.DataInputStream;
      import java.io.FileNotFoundException;
      import java.io.IOException;
    +import java.io.InputStream;
      import java.lang.reflect.Method;
      import java.net.InetSocketAddress;
      import java.net.MalformedURLException;
    @@ -68,7 +70,9 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
      import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
      import org.apache.hadoop.hdfs.client.HdfsAdmin;
      import org.apache.hadoop.hdfs.protocol.EncryptionZone;
    +import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim;
      import org.apache.hadoop.io.LongWritable;
    +import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapred.ClusterStatus;
      import org.apache.hadoop.mapred.InputSplit;
      import org.apache.hadoop.mapred.JobConf;
    @@ -110,10 +114,12 @@ public class Hadoop23Shims extends HadoopShimsSecure {
        HadoopShims.MiniDFSShim cluster = null;
        final boolean zeroCopy;
        final boolean storagePolicy;
    + final boolean fastread;

        public Hadoop23Shims() {
          boolean zcr = false;
          boolean storage = false;
    + boolean fastread = false;
          try {
            Class.forName("org.apache.hadoop.fs.CacheFlag", false,
                ShimLoader.class.getClassLoader());
    @@ -130,8 +136,18 @@ public class Hadoop23Shims extends HadoopShimsSecure {
            } catch (ClassNotFoundException ce) {
            }
          }
    +
    + if (storage) {
    + for (Method m : Text.class.getMethods()) {
    + if ("readWithKnownLength".equals(m.getName())) {
    + fastread = true;
    + }
    + }
    + }
    +
          this.storagePolicy = storage;
          this.zeroCopy = zcr;
    + this.fastread = fastread;
        }

        @Override
    @@ -1409,4 +1425,26 @@ public class Hadoop23Shims extends HadoopShimsSecure {
        public long getFileId(FileSystem fs, String path) throws IOException {
          return ensureDfs(fs).getClient().getFileInfo(path).getFileId();
        }
    +
    + private final class FastTextReaderShim implements TextReaderShim {
    + private final DataInputStream din;
    +
    + public FastTextReaderShim(InputStream in) {
    + this.din = new DataInputStream(in);
    + }
    +
    + @Override
    + public void read(Text txt, int len) throws IOException {
    + txt.readWithKnownLength(din, len);
    + }
    + }
    +
    + @Override
    + public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
    + if (!fastread) {
    + return super.getTextReaderShim(in);
    + }
    + return new FastTextReaderShim(in);
    + }
    +
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    ----------------------------------------------------------------------
    diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    index 2b6f322..6e2dedb 100644
    --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    @@ -18,6 +18,7 @@
      package org.apache.hadoop.hive.shims;

      import java.io.IOException;
    +import java.io.InputStream;
      import java.net.InetSocketAddress;
      import java.net.MalformedURLException;
      import java.net.URI;
    @@ -49,6 +50,7 @@ import org.apache.hadoop.fs.permission.FsAction;
      import org.apache.hadoop.fs.permission.FsPermission;
      import org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue;
      import org.apache.hadoop.io.LongWritable;
    +import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapred.ClusterStatus;
      import org.apache.hadoop.mapred.JobConf;
      import org.apache.hadoop.mapred.JobProfile;
    @@ -746,4 +748,24 @@ public interface HadoopShims {
         * @return inode ID of the file.
         */
        long getFileId(FileSystem fs, String path) throws IOException;
    +
    + /**
    + * Read data into a Text object in the fastest way possible
    + */
    + public interface TextReaderShim {
    + /**
    + * @param txt
    + * @param len
    + * @return bytes read
    + * @throws IOException
    + */
    + void read(Text txt, int size) throws IOException;
    + }
    +
    + /**
    + * Wrap a TextReaderShim around an input stream. The reader shim will not
    + * buffer any reads from the underlying stream and will only consume bytes
    + * which are required for TextReaderShim.read() input.
    + */
    + public TextReaderShim getTextReaderShim(InputStream input) throws IOException;
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb7153f9/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    ----------------------------------------------------------------------
    diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    index 89d7798..c6b7c9d 100644
    --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.shims;

      import java.io.DataInput;
      import java.io.DataOutput;
    +import java.io.EOFException;
      import java.io.IOException;
    +import java.io.InputStream;
      import java.lang.reflect.Constructor;
      import java.net.URI;
      import java.security.AccessControlException;
    @@ -40,6 +42,7 @@ import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.PathFilter;
      import org.apache.hadoop.fs.permission.FsAction;
      import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
    +import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapred.ClusterStatus;
      import org.apache.hadoop.mapred.FileInputFormat;
      import org.apache.hadoop.mapred.InputSplit;
    @@ -392,4 +395,33 @@ public abstract class HadoopShimsSecure implements HadoopShims {

        @Override
        abstract public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException;
    +
    + private final class BasicTextReaderShim implements TextReaderShim {
    + private final InputStream in;
    +
    + public BasicTextReaderShim(InputStream in) {
    + this.in = in;
    + }
    +
    + @Override
    + public void read(Text txt, int len) throws IOException {
    + int offset = 0;
    + byte[] bytes = new byte[len];
    + while (len > 0) {
    + int written = in.read(bytes, offset, len);
    + if (written < 0) {
    + throw new EOFException("Can't finish read from " + in + " read "
    + + (offset) + " bytes out of " + bytes.length);
    + }
    + len -= written;
    + offset += written;
    + }
    + txt.set(bytes);
    + }
    + }
    +
    + @Override
    + public TextReaderShim getTextReaderShim(InputStream in) throws IOException {
    + return new BasicTextReaderShim(in);
    + }
      }
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11669: OrcFileDump service should support directories (Prasanth Jayachandran reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e712da0
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e712da0
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e712da0

    Branch: refs/heads/beeline-cli
    Commit: 8e712da0d8464173e0977b61661bbd00960b08d8
    Parents: 2ef40ca
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Fri Aug 28 13:13:18 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Fri Aug 28 13:13:18 2015 -0500

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/io/orc/FileDump.java | 48 ++++++++++++++++++--
      1 file changed, 44 insertions(+), 4 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/8e712da0/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    index 4acb810..76ecb33 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    @@ -22,7 +22,7 @@ import java.io.OutputStreamWriter;
      import java.io.PrintStream;
      import java.text.DecimalFormat;
      import java.util.ArrayList;
    -import java.util.Arrays;
    +import java.util.Collection;
      import java.util.List;
      import java.util.Map;

    @@ -32,8 +32,10 @@ import org.apache.commons.cli.HelpFormatter;
      import org.apache.commons.cli.OptionBuilder;
      import org.apache.commons.cli.Options;
      import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileStatus;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.ql.io.AcidUtils;
      import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
      import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
      import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
    @@ -47,6 +49,9 @@ import org.apache.hadoop.io.LongWritable;
      import org.codehaus.jettison.json.JSONException;
      import org.codehaus.jettison.json.JSONWriter;

    +import com.google.common.base.Strings;
    +import com.google.common.collect.Lists;
    +
      /**
       * A tool for printing out the file structure of ORC files.
       */
    @@ -86,23 +91,55 @@ public final class FileDump {
            System.err.println("Error : ORC files are not specified");
            return;
          }
    +
    + // if the specified path is directory, iterate through all files and print the file dump
    + List<String> filesInPath = Lists.newArrayList();
    + for (String filename : files) {
    + Path path = new Path(filename);
    + filesInPath.addAll(getAllFilesInPath(path, conf));
    + }
    +
          if (dumpData) {
    - printData(Arrays.asList(files), conf);
    + printData(filesInPath, conf);
          } else {
            if (jsonFormat) {
              boolean prettyPrint = cli.hasOption('p');
    - JsonFileDump.printJsonMetaData(Arrays.asList(files), conf, rowIndexCols, prettyPrint,
    + JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint,
                  printTimeZone);
            } else {
    - printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
    + printMetaData(filesInPath, conf, rowIndexCols, printTimeZone);
    + }
    + }
    + }
    +
    + private static Collection<? extends String> getAllFilesInPath(final Path path,
    + final Configuration conf) throws IOException {
    + List<String> filesInPath = Lists.newArrayList();
    + FileSystem fs = path.getFileSystem(conf);
    + FileStatus fileStatus = fs.getFileStatus(path);
    + if (fileStatus.isDir()) {
    + FileStatus[] fileStatuses = fs.listStatus(path, AcidUtils.hiddenFileFilter);
    + for (FileStatus fileInPath : fileStatuses) {
    + if (fileInPath.isDir()) {
    + filesInPath.addAll(getAllFilesInPath(fileInPath.getPath(), conf));
    + } else {
    + filesInPath.add(fileInPath.getPath().toString());
    + }
            }
    + } else {
    + filesInPath.add(path.toString());
          }
    +
    + return filesInPath;
        }

        private static void printData(List<String> files, Configuration conf) throws IOException,
            JSONException {
          for (String file : files) {
            printJsonData(conf, file);
    + if (files.size() > 1) {
    + System.out.println(Strings.repeat("=", 80) + "\n");
    + }
          }
        }

    @@ -204,6 +241,9 @@ public final class FileDump {
            System.out.println("Padding length: " + paddedBytes + " bytes");
            System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
            rows.close();
    + if (files.size() > 1) {
    + System.out.println(Strings.repeat("=", 80) + "\n");
    + }
          }
        }
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

    Branch: refs/heads/beeline-cli
    Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
    Parents: b247cac
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 11:43:25 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 11:43:25 2015 -0500

    ----------------------------------------------------------------------
      .../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +++++++++++---------
      .../queries/clientnegative/load_orc_negative3.q | 6 ++++
      .../test/queries/clientpositive/load_orc_part.q | 4 +++
      .../clientnegative/load_orc_negative3.q.out | 25 +++++++++++++
      .../results/clientpositive/load_orc_part.q.out | 18 ++++++++++
      5 files changed, 75 insertions(+), 16 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    index 85fa9c9..9d2702f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
    @@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          return new URI(fromScheme, fromAuthority, path, null, null);
        }

    - private void applyConstraints(URI fromURI, URI toURI, Tree ast,
    + private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
            boolean isLocal) throws SemanticException {

    + FileStatus[] srcs = null;
    +
          // local mode implies that scheme should be "file"
          // we can change this going forward
          if (isLocal && !fromURI.getScheme().equals("file")) {
    @@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          try {
    - FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
    + srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
            if (srcs == null || srcs.length == 0) {
              throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
                  "No files matching path " + fromURI));
    @@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
                + "\"hive.metastore.warehouse.dir\" do not conflict.";
            throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
          }
    +
    + return srcs;
        }

        @Override
    @@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }

          // make sure the arguments make sense
    - applyConstraints(fromURI, toURI, fromTree, isLocal);
    + FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);

          // for managed tables, make sure the file formats match
          if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
    - ensureFileFormatsMatch(ts, fromURI);
    + ensureFileFormatsMatch(ts, files);
          }
          inputs.add(toReadEntity(new Path(fromURI)));
          Task<? extends Serializable> rTask = null;
    @@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          }
        }

    - private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
    + private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
          final Class<? extends InputFormat> destInputFormat;
          try {
            if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
    @@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
          // Other file formats should do similar check to make sure file formats match
          // when doing LOAD DATA .. INTO TABLE
          if (OrcInputFormat.class.equals(destInputFormat)) {
    - Path inputFilePath = new Path(fromURI);
    - try {
    - FileSystem fs = FileSystem.get(fromURI, conf);
    - // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    - OrcFile.createReader(fs, inputFilePath);
    - } catch (FileFormatException e) {
    - throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    - " table is stored as ORC but the file being loaded is not a valid ORC file."));
    - } catch (IOException e) {
    - throw new SemanticException("Unable to load data to destination table." +
    - " Error: " + e.getMessage());
    + for (FileStatus fileStatus : fileStatuses) {
    + try {
    + Path filePath = fileStatus.getPath();
    + FileSystem fs = FileSystem.get(filePath.toUri(), conf);
    + // just creating orc reader is going to do sanity checks to make sure its valid ORC file
    + OrcFile.createReader(fs, filePath);
    + } catch (FileFormatException e) {
    + throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
    + " table is stored as ORC but the file being loaded is not a valid ORC file."));
    + } catch (IOException e) {
    + throw new SemanticException("Unable to load data to destination table." +
    + " Error: " + e.getMessage());
    + }
            }
          }
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    new file mode 100644
    index 0000000..9a4116e
    --- /dev/null
    +++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
    @@ -0,0 +1,6 @@
    +create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data local inpath '../../data/files/kv1.txt' into table text_test;
    +
    +set hive.default.fileformat=ORC;
    +create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
    index 0927ea4..2ff884d 100644
    --- a/ql/src/test/queries/clientpositive/load_orc_part.q
    +++ b/ql/src/test/queries/clientpositive/load_orc_part.q
    @@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
      load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
      dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;

    +load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
    +load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
    +
      alter table orc_test add partition(ds='11');
      alter table orc_test partition(ds='11') set fileformat textfile;
      load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    new file mode 100644
    index 0000000..77fb50e
    --- /dev/null
    +++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
    @@ -0,0 +1,25 @@
    +PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@text_test
    +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@text_test
    +PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orc_test
    +POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orc_test
    +FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

    http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
    index 34ca493..2e02c2e 100644
    --- a/ql/src/test/results/clientpositive/load_orc_part.q.out
    +++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
    @@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
      POSTHOOK: Output: default@orc_test@ds=10
      Found 2 items
      #### A masked pattern was here ####
    +PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_staging
    +POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_staging
    +#### A masked pattern was here ####
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@orc_test@ds=10
    +#### A masked pattern was here ####
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@orc_test@ds=10
    +Found 1 items
    +#### A masked pattern was here ####
      PREHOOK: query: alter table orc_test add partition(ds='11')
      PREHOOK: type: ALTERTABLE_ADDPARTS
      PREHOOK: Output: default@orc_test
  • Xuf at Sep 9, 2015 at 7:08 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
    new file mode 100644
    index 0000000..55edd90
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
    @@ -0,0 +1,1647 @@
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-09
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@pcr_t1@ds=2000-04-10
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: create table pcr_t2 (ds string, key int, value string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Input: default@pcr_t1@ds=2000-04-08
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08
    +POSTHOOK: Output: default@pcr_t2
    +POSTHOOK: Lineage: pcr_t2.ds SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:ds, type:string, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ]
    +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ]
    +PREHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + key
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + value
    + TOK_SELEXPR
    + TOK_TABLE_OR_COL
    + ds
    + TOK_WHERE
    + or
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-08'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 1
    + and
    + =
    + TOK_TABLE_OR_COL
    + ds
    + '2000-04-09'
    + =
    + TOK_TABLE_OR_COL
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + key
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + value
    + TOK_TABSORTCOLNAMEASC
    + TOK_TABLE_OR_COL
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: pcr_t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: int), value (type: string), ds (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [pcr_t1]
    + /pcr_t1/ds=2000-04-09 [pcr_t1]
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2
    + columns.types int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1, t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-08' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t2
    + and
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: int)
    + sort order: +
    + Map-reduce partition columns: key (type: int)
    + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: int)
    + 1 key (type: int)
    + outputColumnNames: _col0, _col1, _col6, _col7
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string)
    + outputColumnNames: _col0, _col1, _col3, _col4
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col3,_col4
    + columns.types int,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), '2000-04-08' (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), '2000-04-09' (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:int:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((_col2) IN ('2000-04-08', '2000-04-09') and (struct(_col7,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean)
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
    + sort order: +++
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Shuffle Join JOIN[4][tables = [t1, t2]] in Stage 'Stage-1:MAPRED' is a cross product
    +PREHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_JOIN
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t1
    + t1
    + TOK_TABREF
    + TOK_TABNAME
    + pcr_t2
    + t2
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + TOK_ALLCOLREF
    + TOK_WHERE
    + or
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-08'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 1
    + and
    + =
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    + '2000-04-09'
    + =
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + 2
    + TOK_ORDERBY
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + key
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t1
    + value
    + TOK_TABSORTCOLNAMEASC
    + .
    + TOK_TABLE_OR_COL
    + t2
    + ds
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: key (type: int), value (type: string), ds (type: string)
    + auto parallelism: false
    + TableScan
    + alias: t2
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: ds (type: string), key (type: int), value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-08
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-08
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-09
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-09
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: ds=2000-04-10
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2000-04-10
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + numFiles 1
    + numRows 20
    + partition_columns ds
    + partition_columns.types string
    + rawDataSize 160
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 180
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments
    + columns.types int:string
    +#### A masked pattern was here ####
    + name default.pcr_t1
    + partition_columns ds
    + partition_columns.types string
    + serialization.ddl struct pcr_t1 { i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t1
    + name: default.pcr_t1
    +#### A masked pattern was here ####
    + Partition
    + base file name: pcr_t2
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns ds,key,value
    + columns.comments
    + columns.types string:int:string
    +#### A masked pattern was here ####
    + name default.pcr_t2
    + numFiles 1
    + numRows 1
    + rawDataSize 18
    + serialization.ddl struct pcr_t2 { string ds, i32 key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 19
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.pcr_t2
    + name: default.pcr_t2
    + Truncated Path -> Alias:
    + /pcr_t1/ds=2000-04-08 [t1]
    + /pcr_t1/ds=2000-04-09 [t1]
    + /pcr_t1/ds=2000-04-10 [t1]
    + /pcr_t2 [t2]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0
    + 1
    + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8
    + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: (struct(_col0,_col6)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + GatherStats: false
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
    + sort order: +++
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + tag: -1
    + value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: -mr-10003
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int,string,string,string,int,string
    + escape.delim \
    + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Truncated Path -> Alias:
    +#### A masked pattern was here ####
    + Needs Tagging: false
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns _col0,_col1,_col2,_col3,_col4,_col5
    + columns.types int:string:string:string:int:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: drop table pcr_t1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t1
    +PREHOOK: Output: default@pcr_t1
    +POSTHOOK: query: drop table pcr_t1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t1
    +POSTHOOK: Output: default@pcr_t1
    +PREHOOK: query: drop table pcr_t2
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@pcr_t2
    +PREHOOK: Output: default@pcr_t2
    +POSTHOOK: query: drop table pcr_t2
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@pcr_t2
    +POSTHOOK: Output: default@pcr_t2
    +PREHOOK: query: drop table pcr_t3
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table pcr_t3
    +POSTHOOK: type: DROPTABLE

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out
    index f536767..17248e4 100644
    --- a/ql/src/test/results/clientpositive/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out
    @@ -390,21 +390,21 @@ STAGE PLANS:
                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                      Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out
    index 5aa0df8..fb08f10 100644
    --- a/ql/src/test/results/clientpositive/spark/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out
    @@ -2534,16 +2534,16 @@ STAGE PLANS:
                        GatherStats: false
                        Filter Operator
                          isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: int), value (type: string), ds (type: string)
                            outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                              sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                              tag: -1
                              auto parallelism: false
                  Path -> Alias:
    @@ -2648,13 +2648,13 @@ STAGE PLANS:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
                        GlobalTableId: 0
      #### A masked pattern was here ####
                        NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    index a6e6e38..52a847a 100644
    --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out
    @@ -405,21 +405,21 @@ STAGE PLANS:
                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                            Filter Operator
    - predicate: (_col0) IN ('a', 'b') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                            Filter Operator
    - predicate: (_col0) IN ('c', 'd') (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index e8a9786..9756b0c 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -2909,7 +2909,7 @@ Stage-0
            Select Operator [SEL_2]
               outputColumnNames:["_col0"]
               Filter Operator [FIL_4]
    - predicate:(c_int) IN (-6, 6) (type: boolean)
    + predicate:((c_int = -6) or (c_int = 6)) (type: boolean)
                  TableScan [TS_0]
                     alias:cbo_t1


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    index 54003c3..c2250e6 100644
    --- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
    @@ -45,7 +45,7 @@ STAGE PLANS:
              TableScan
                alias: alltypesorc
                Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
                  Select Operator
                    expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                    outputColumnNames: _col0, _col1, _col2

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/vectorized_case.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
    index 9e47014..73bf12d 100644
    --- a/ql/src/test/results/clientpositive/vectorized_case.q.out
    +++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
    @@ -46,19 +46,20 @@ STAGE PLANS:
                  alias: alltypesorc
                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (csmallint) IN (418, 12205, 10583) (type: boolean)
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized

        Stage: Stage-0
          Fetch Operator
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11618: Correct the SARG api to reunify the PredicateLeaf.Type INTEGER and LONG (Owen O'Malley, reviewed by Sergio Pena)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97bf32a1
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97bf32a1
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97bf32a1

    Branch: refs/heads/beeline-cli
    Commit: 97bf32a12f754d83a362aaa4048a6612d299a386
    Parents: ed4517c
    Author: Sergio Pena <sergio.pena@cloudera.com>
    Authored: Fri Aug 28 17:59:15 2015 -0500
    Committer: Sergio Pena <sergio.pena@cloudera.com>
    Committed: Fri Aug 28 17:59:15 2015 -0500

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 -
      .../hive/ql/io/parquet/LeafFilterFactory.java | 14 +-
      .../read/ParquetFilterPredicateConverter.java | 35 +++--
      .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 3 -
      .../hive/ql/io/orc/TestInputOutputFormat.java | 4 +-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 10 +-
      .../hive/ql/io/orc/TestRecordReaderImpl.java | 42 +++---
      .../parquet/TestParquetRecordReaderWrapper.java | 50 +++++---
      .../read/TestParquetFilterPredicate.java | 6 +-
      .../ql/io/sarg/TestConvertAstToSearchArg.java | 128 +++++++++++--------
      .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 22 ++--
      .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 3 +-
      12 files changed, 181 insertions(+), 138 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    index 0d765b1..fcb3746 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    @@ -637,8 +637,6 @@ class RecordReaderImpl implements RecordReader {
                return ((BigDecimal) obj).doubleValue();
              }
              break;
    - case INTEGER:
    - // fall through
            case LONG:
              if (obj instanceof Number) {
                // widening conversion

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    index a1dbc1a..1ceea6e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    @@ -22,6 +22,8 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
      import org.apache.parquet.filter2.predicate.FilterApi;
      import org.apache.parquet.filter2.predicate.FilterPredicate;
      import org.apache.parquet.io.api.Binary;
    +import org.apache.parquet.schema.PrimitiveType;
    +import org.apache.parquet.schema.Type;

      import static org.apache.parquet.filter2.predicate.FilterApi.eq;
      import static org.apache.parquet.filter2.predicate.FilterApi.lt;
    @@ -146,12 +148,16 @@ public class LeafFilterFactory {
         * @param type FilterPredicateType
         * @return
         */
    - public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){
    + public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type,
    + Type parquetType){
          switch (type){
    - case INTEGER:
    - return new IntFilterPredicateLeafBuilder();
            case LONG:
    - return new LongFilterPredicateLeafBuilder();
    + if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
    + PrimitiveType.PrimitiveTypeName.INT32) {
    + return new IntFilterPredicateLeafBuilder();
    + } else {
    + return new LongFilterPredicateLeafBuilder();
    + }
            case FLOAT: // float and double
              return new DoubleFilterPredicateLeafBuilder();
            case STRING: // string, char, varchar

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
    index f170026..d1864ae 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
    @@ -37,14 +37,6 @@ public class ParquetFilterPredicateConverter {
        private static final Log LOG = LogFactory.getLog(ParquetFilterPredicateConverter.class);

        /**
    - * Translate the search argument to the filter predicate parquet uses
    - * @return translate the sarg into a filter predicate
    - */
    - public static FilterPredicate toFilterPredicate(SearchArgument sarg) {
    - return toFilterPredicate(sarg, null);
    - }
    -
    - /**
         * Translate the search argument to the filter predicate parquet uses. It includes
         * only the columns from the passed schema.
         * @return translate the sarg into a filter predicate
    @@ -58,18 +50,21 @@ public class ParquetFilterPredicateConverter {
            }
          }

    - return translate(sarg.getExpression(), sarg.getLeaves(), columns);
    + return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema);
        }

    - private static FilterPredicate translate(ExpressionTree root, List<PredicateLeaf> leaves, Set<String> columns) {
    + private static FilterPredicate translate(ExpressionTree root,
    + List<PredicateLeaf> leaves,
    + Set<String> columns,
    + MessageType schema) {
          FilterPredicate p = null;
          switch (root.getOperator()) {
            case OR:
              for(ExpressionTree child: root.getChildren()) {
                if (p == null) {
    - p = translate(child, leaves, columns);
    + p = translate(child, leaves, columns, schema);
                } else {
    - FilterPredicate right = translate(child, leaves, columns);
    + FilterPredicate right = translate(child, leaves, columns, schema);
                  // constant means no filter, ignore it when it is null
                  if(right != null){
                    p = FilterApi.or(p, right);
    @@ -80,9 +75,9 @@ public class ParquetFilterPredicateConverter {
            case AND:
              for(ExpressionTree child: root.getChildren()) {
                if (p == null) {
    - p = translate(child, leaves, columns);
    + p = translate(child, leaves, columns, schema);
                } else {
    - FilterPredicate right = translate(child, leaves, columns);
    + FilterPredicate right = translate(child, leaves, columns, schema);
                  // constant means no filter, ignore it when it is null
                  if(right != null){
                    p = FilterApi.and(p, right);
    @@ -91,7 +86,8 @@ public class ParquetFilterPredicateConverter {
              }
              return p;
            case NOT:
    - FilterPredicate op = translate(root.getChildren().get(0), leaves, columns);
    + FilterPredicate op = translate(root.getChildren().get(0), leaves,
    + columns, schema);
              if (op != null) {
                return FilterApi.not(op);
              } else {
    @@ -101,8 +97,9 @@ public class ParquetFilterPredicateConverter {
              PredicateLeaf leaf = leaves.get(root.getLeaf());

              // If columns is null, then we need to create the leaf
    - if (columns == null || columns.contains(leaf.getColumnName())) {
    - return buildFilterPredicateFromPredicateLeaf(leaf);
    + if (columns.contains(leaf.getColumnName())) {
    + Type parquetType = schema.getType(leaf.getColumnName());
    + return buildFilterPredicateFromPredicateLeaf(leaf, parquetType);
              } else {
                // Do not create predicate if the leaf is not on the passed schema.
                return null;
    @@ -116,12 +113,12 @@ public class ParquetFilterPredicateConverter {
        }

        private static FilterPredicate buildFilterPredicateFromPredicateLeaf
    - (PredicateLeaf leaf) {
    + (PredicateLeaf leaf, Type parquetType) {
          LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
          FilterPredicateLeafBuilder builder;
          try {
            builder = leafFilterFactory
    - .getLeafFilterBuilderByType(leaf.getType());
    + .getLeafFilterBuilderByType(leaf.getType(), parquetType);
            if (builder == null) {
              return null;
            }

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
    index 5c4b7ea..e034650 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
    @@ -88,7 +88,6 @@ public class ConvertAstToSearchArg {
              case BYTE:
              case SHORT:
              case INT:
    - return PredicateLeaf.Type.INTEGER;
              case LONG:
                return PredicateLeaf.Type.LONG;
              case CHAR:
    @@ -139,8 +138,6 @@ public class ConvertAstToSearchArg {
            return null;
          }
          switch (type) {
    - case INTEGER:
    - return ((Number) lit).intValue();
            case LONG:
              return ((Number) lit).longValue();
            case STRING:

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    index 547e799..ce86cd8 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    @@ -1844,7 +1844,7 @@ public class TestInputOutputFormat {
          types.add(builder.build());
          types.add(builder.build());
          SearchArgument isNull = SearchArgumentFactory.newBuilder()
    - .startAnd().isNull("cost", PredicateLeaf.Type.INTEGER).end().build();
    + .startAnd().isNull("cost", PredicateLeaf.Type.LONG).end().build();
          conf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, toKryo(isNull));
          conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
              "url,cost");
    @@ -1889,7 +1889,7 @@ public class TestInputOutputFormat {
          SearchArgument sarg =
              SearchArgumentFactory.newBuilder()
                  .startAnd()
    - .lessThan("z", PredicateLeaf.Type.INTEGER, new Integer(0))
    + .lessThan("z", PredicateLeaf.Type.LONG, new Long(0))
                  .end()
                  .build();
          conf.set("sarg.pushdown", toKryo(sarg));

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    index 4480d22..0bb8401 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    @@ -1923,9 +1923,9 @@ public class TestOrcFile {
          SearchArgument sarg = SearchArgumentFactory.newBuilder()
              .startAnd()
                .startNot()
    - .lessThan("int1", PredicateLeaf.Type.INTEGER, 300000)
    + .lessThan("int1", PredicateLeaf.Type.LONG, 300000L)
                .end()
    - .lessThan("int1", PredicateLeaf.Type.INTEGER, 600000)
    + .lessThan("int1", PredicateLeaf.Type.LONG, 600000L)
              .end()
              .build();
          RecordReader rows = reader.rowsOptions(new Reader.Options()
    @@ -1946,7 +1946,7 @@ public class TestOrcFile {
          // look through the file with no rows selected
          sarg = SearchArgumentFactory.newBuilder()
              .startAnd()
    - .lessThan("int1", PredicateLeaf.Type.INTEGER, 0)
    + .lessThan("int1", PredicateLeaf.Type.LONG, 0L)
              .end()
              .build();
          rows = reader.rowsOptions(new Reader.Options()
    @@ -1959,9 +1959,9 @@ public class TestOrcFile {
          // select first 100 and last 100 rows
          sarg = SearchArgumentFactory.newBuilder()
              .startOr()
    - .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 100)
    + .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 100)
                .startNot()
    - .lessThan("int1", PredicateLeaf.Type.INTEGER, 300 * 3400)
    + .lessThan("int1", PredicateLeaf.Type.LONG, 300L * 3400)
                .end()
              .end()
              .build();

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    index 7957cb4..839bbc6 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    @@ -445,7 +445,7 @@ public class TestRecordReaderImpl {
        @Test
        public void testPredEvalWithStringStats() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
    - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 100, null);
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 100L, null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));

    @@ -550,7 +550,7 @@ public class TestRecordReaderImpl {
        @Test
        public void testPredEvalWithDecimalStats() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
    - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));

    @@ -590,7 +590,7 @@ public class TestRecordReaderImpl {
        @Test
        public void testPredEvalWithTimestampStats() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
    - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15, null);
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));

    @@ -637,8 +637,8 @@ public class TestRecordReaderImpl {
        @Test
        public void testEquals() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.INTEGER,
    - "x", 15, null);
    + (PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.LONG,
    + "x", 15L, null);
          assertEquals(TruthValue.NO_NULL,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
          assertEquals(TruthValue.YES_NO_NULL,
    @@ -656,8 +656,8 @@ public class TestRecordReaderImpl {
        @Test
        public void testNullSafeEquals() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER,
    - "x", 15, null);
    + (PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG,
    + "x", 15L, null);
          assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
          assertEquals(TruthValue.YES_NO,
    @@ -675,8 +675,8 @@ public class TestRecordReaderImpl {
        @Test
        public void testLessThan() throws Exception {
          PredicateLeaf lessThan = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.INTEGER,
    - "x", 15, null);
    + (PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.LONG,
    + "x", 15L, null);
          assertEquals(TruthValue.NO_NULL,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), lessThan, null));
          assertEquals(TruthValue.NO_NULL,
    @@ -692,8 +692,8 @@ public class TestRecordReaderImpl {
        @Test
        public void testLessThanEquals() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.INTEGER,
    - "x", 15, null);
    + (PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.LONG,
    + "x", 15L, null);
          assertEquals(TruthValue.NO_NULL,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
          assertEquals(TruthValue.YES_NO_NULL,
    @@ -709,10 +709,10 @@ public class TestRecordReaderImpl {
        @Test
        public void testIn() throws Exception {
          List<Object> args = new ArrayList<Object>();
    - args.add(10);
    - args.add(20);
    + args.add(10L);
    + args.add(20L);
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
    + (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
                  "x", null, args);
          assertEquals(TruthValue.YES_NULL,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 20L), pred, null));
    @@ -727,10 +727,10 @@ public class TestRecordReaderImpl {
        @Test
        public void testBetween() throws Exception {
          List<Object> args = new ArrayList<Object>();
    - args.add(10);
    - args.add(20);
    + args.add(10L);
    + args.add(20L);
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.INTEGER,
    + (PredicateLeaf.Operator.BETWEEN, PredicateLeaf.Type.LONG,
                  "x", null, args);
          assertEquals(TruthValue.NO_NULL,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(0L, 5L), pred, null));
    @@ -751,7 +751,7 @@ public class TestRecordReaderImpl {
        @Test
        public void testIsNull() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.INTEGER,
    + (PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.LONG,
                  "x", null, null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(20L, 30L), pred, null));
    @@ -1306,10 +1306,10 @@ public class TestRecordReaderImpl {
        @Test
        public void testIntInBloomFilter() throws Exception {
          List<Object> args = new ArrayList<Object>();
    - args.add(15);
    - args.add(19);
    + args.add(15L);
    + args.add(19L);
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf
    - (PredicateLeaf.Operator.IN, PredicateLeaf.Type.INTEGER,
    + (PredicateLeaf.Operator.IN, PredicateLeaf.Type.LONG,
                  "x", null, args);
          BloomFilterIO bf = new BloomFilterIO(10000);
          for (int i = 20; i < 1000; i++) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
    index f9ca528..e92b696 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
    @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
      import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    +import org.apache.parquet.schema.MessageType;
    +import org.apache.parquet.schema.MessageTypeParser;
      import org.junit.Test;

      import java.sql.Date;
    @@ -48,15 +50,19 @@ public class TestParquetRecordReaderWrapper {
           SearchArgument sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    - .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .isNull("x", PredicateLeaf.Type.LONG)
    + .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
              .end()
              .end()
              .build();

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema = MessageTypeParser.parseMessageType("message test {" +
    + " optional int32 x; required int32 y; required int32 z;" +
    + " optional binary a;}");
    + FilterPredicate p =
    + ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected =
            "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " +
              "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))";
    @@ -75,23 +81,27 @@ public class TestParquetRecordReaderWrapper {
                  .equals("z", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("1.0"))
                  .end()
                  .build();
    + MessageType schema = MessageTypeParser.parseMessageType("message test {" +
    + " required int32 x; required binary y; required binary z;}");
          assertEquals("lteq(y, Binary{\"hi \"})",
    - ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
    + ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());

          sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    + .isNull("x", PredicateLeaf.Type.LONG)
              .between("y", PredicateLeaf.Type.DECIMAL,
                  new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING,
                  new HiveVarchar("stinger", 100).toString())
              .end()
              .end()
              .build();
    -
    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + schema = MessageTypeParser.parseMessageType("message test {" +
    + " optional int32 x; required binary y; required int32 z;" +
    + " optional binary a;}");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected =
              "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
              "not(eq(a, Binary{\"stinger\"})))";
    @@ -110,23 +120,28 @@ public class TestParquetRecordReaderWrapper {
                      new HiveDecimalWritable("1.0"))
                  .end()
                  .build();
    + MessageType schema = MessageTypeParser.parseMessageType("message test {" +
    + " required int32 x; required binary y; required binary z;}");
          assertEquals("lteq(y, Binary{\"hi \"})",
    - ParquetFilterPredicateConverter.toFilterPredicate(sarg).toString());
    + ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema).toString());

          sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    + .isNull("x", PredicateLeaf.Type.LONG)
              .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
                  new HiveDecimalWritable("20.0"))
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING,
                  new HiveVarchar("stinger", 100).toString())
              .end()
              .end()
              .build();
    + schema = MessageTypeParser.parseMessageType("message test {" +
    + " optional int32 x; required binary y; required int32 z;" +
    + " optional binary a;}");

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " +
              "not(eq(a, Binary{\"stinger\"})))";
          assertEquals(expected, p.toString());
    @@ -137,16 +152,19 @@ public class TestParquetRecordReaderWrapper {
          SearchArgument sarg =
              SearchArgumentFactory.newBuilder()
                  .startAnd()
    - .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
    - .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
    + .lessThan("x", PredicateLeaf.Type.LONG, 22L)
    + .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
                  .lessThanEquals("y", PredicateLeaf.Type.STRING,
                      new HiveChar("hi", 10).toString())
                  .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
                  .equals("z1", PredicateLeaf.Type.FLOAT, new Double(0.22))
                  .end()
                  .build();
    + MessageType schema = MessageTypeParser.parseMessageType("message test {" +
    + " required int32 x; required int32 x1;" +
    + " required binary y; required float z; required float z1;}");

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = "and(and(and(and(lt(x, 22), lt(x1, 22))," +
              " lteq(y, Binary{\"hi \"})), eq(z, " +
              "0.22)), eq(z1, 0.22))";

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    index 847a02b..ac5c1a0 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    @@ -35,9 +35,9 @@ public class TestParquetFilterPredicate {
          SearchArgument sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("a", PredicateLeaf.Type.INTEGER)
    - .between("y", PredicateLeaf.Type.INTEGER, 10, 20) // Column will be removed from filter
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3) // Column will be removed from filter
    + .isNull("a", PredicateLeaf.Type.LONG)
    + .between("y", PredicateLeaf.Type.LONG, 10L, 20L) // Column will be removed from filter
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L) // Column will be removed from filter
              .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
              .end()
              .end()

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
    index 9e8425a..e72789d 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestConvertAstToSearchArg.java
    @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
      import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
      import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
    +import org.apache.parquet.schema.MessageType;
    +import org.apache.parquet.schema.MessageTypeParser;
      import org.junit.Test;

      import java.beans.XMLDecoder;
    @@ -550,7 +552,11 @@ public class TestConvertAstToSearchArg {
          List<PredicateLeaf> leaves = sarg.getLeaves();
          assertEquals(9, leaves.size());

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    +
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String[] conditions = new String[]{
            "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */
            "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */
    @@ -586,34 +592,34 @@ public class TestConvertAstToSearchArg {
          assertEquals("alan", leaf.getLiteral());

          leaf = leaves.get(3);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(12, leaf.getLiteral());
    + assertEquals(12L, leaf.getLiteral());

          leaf = leaves.get(4);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(13, leaf.getLiteral());
    + assertEquals(13L, leaf.getLiteral());

          leaf = leaves.get(5);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(15, leaf.getLiteral());
    + assertEquals(15L, leaf.getLiteral());

          leaf = leaves.get(6);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(16, leaf.getLiteral());
    + assertEquals(16L, leaf.getLiteral());

          leaf = leaves.get(7);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(30, leaf.getLiteral());
    + assertEquals(30L, leaf.getLiteral());

          leaf = leaves.get(8);
          assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    @@ -842,7 +848,10 @@ public class TestConvertAstToSearchArg {
            "lteq(id, 4)" /* id <= 4 */
          };

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions);
          assertEquals(expected, p.toString());

    @@ -860,16 +869,16 @@ public class TestConvertAstToSearchArg {
          assertEquals("sue", leaf.getLiteral());

          leaf = leaves.get(2);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(12, leaf.getLiteral());
    + assertEquals(12L, leaf.getLiteral());

          leaf = leaves.get(3);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(4, leaf.getLiteral());
    + assertEquals(4L, leaf.getLiteral());

          assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)",
              sarg.getExpression().toString());
    @@ -1271,18 +1280,21 @@ public class TestConvertAstToSearchArg {
            "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */
            "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */
          };
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; required binary last_name;}");

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions);
          assertEquals(expected, p.toString());

          PredicateLeaf leaf = leaves.get(0);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
          assertEquals(null, leaf.getLiteral());
    - assertEquals(23, leaf.getLiteralList().get(0));
    - assertEquals(45, leaf.getLiteralList().get(1));
    + assertEquals(23L, leaf.getLiteralList().get(0));
    + assertEquals(45L, leaf.getLiteralList().get(1));

          leaf = leaves.get(1);
          assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    @@ -1493,15 +1505,19 @@ public class TestConvertAstToSearchArg {
            "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */
          };

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p =
    + ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions);
          assertEquals(expected, p.toString());

          PredicateLeaf leaf = leaves.get(0);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(12, leaf.getLiteral());
    + assertEquals(12L, leaf.getLiteral());

          leaf = leaves.get(1);
          assertEquals(PredicateLeaf.Type.STRING, leaf.getType());
    @@ -1511,11 +1527,11 @@ public class TestConvertAstToSearchArg {
          assertEquals("sue", leaf.getLiteralList().get(1));

          leaf = leaves.get(2);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(34, leaf.getLiteralList().get(0));
    - assertEquals(50, leaf.getLiteralList().get(1));
    + assertEquals(34L, leaf.getLiteralList().get(0));
    + assertEquals(50L, leaf.getLiteralList().get(1));

          assertEquals("(and (not leaf-0) leaf-1 leaf-2)",
              sarg.getExpression().toString());
    @@ -1752,7 +1768,10 @@ public class TestConvertAstToSearchArg {
          List<PredicateLeaf> leaves = sarg.getLeaves();
          assertEquals(1, leaves.size());

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected =
            "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))";
          assertEquals(p.toString(), expected);
    @@ -2232,7 +2251,10 @@ public class TestConvertAstToSearchArg {
          List<PredicateLeaf> leaves = sarg.getLeaves();
          assertEquals(9, leaves.size());

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" +
            "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " +
            "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " +
    @@ -2255,58 +2277,58 @@ public class TestConvertAstToSearchArg {
          assertEquals(p.toString(), expected);

          PredicateLeaf leaf = leaves.get(0);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(18, leaf.getLiteral());
    + assertEquals(18L, leaf.getLiteral());

          leaf = leaves.get(1);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(10, leaf.getLiteral());
    + assertEquals(10L, leaf.getLiteral());

          leaf = leaves.get(2);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(13, leaf.getLiteral());
    + assertEquals(13L, leaf.getLiteral());

          leaf = leaves.get(3);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(16, leaf.getLiteral());
    + assertEquals(16L, leaf.getLiteral());

          leaf = leaves.get(4);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(11, leaf.getLiteral());
    + assertEquals(11L, leaf.getLiteral());

          leaf = leaves.get(5);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(12, leaf.getLiteral());
    + assertEquals(12L, leaf.getLiteral());

          leaf = leaves.get(6);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(14, leaf.getLiteral());
    + assertEquals(14L, leaf.getLiteral());

          leaf = leaves.get(7);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(15, leaf.getLiteral());
    + assertEquals(15L, leaf.getLiteral());

          leaf = leaves.get(8);
    - assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaf.getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator());
          assertEquals("id", leaf.getColumnName());
    - assertEquals(17, leaf.getLiteral());
    + assertEquals(17L, leaf.getLiteral());

          assertEquals("(and" +
              " (or leaf-0 leaf-1 leaf-2 leaf-3)" +
    @@ -2388,7 +2410,10 @@ public class TestConvertAstToSearchArg {
          List<PredicateLeaf> leaves = sarg.getLeaves();
          assertEquals(0, leaves.size());

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          assertNull(p);

          assertEquals("YES_NO_NULL",
    @@ -2643,15 +2668,18 @@ public class TestConvertAstToSearchArg {
          List<PredicateLeaf> leaves = sarg.getLeaves();
          assertEquals(1, leaves.size());

    - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg);
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required int32 id;" +
    + " required binary first_name; }");
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
          String expected = "and(not(lt(id, 10)), not(lt(id, 10)))";
          assertEquals(expected, p.toString());

    - assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType());
    + assertEquals(PredicateLeaf.Type.LONG, leaves.get(0).getType());
          assertEquals(PredicateLeaf.Operator.LESS_THAN,
              leaves.get(0).getOperator());
          assertEquals("id", leaves.get(0).getColumnName());
    - assertEquals(10, leaves.get(0).getLiteral());
    + assertEquals(10L, leaves.get(0).getLiteral());

          assertEquals("(and (not leaf-0) (not leaf-0))",
              sarg.getExpression().toString());

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
    index 20de846..573d5c6 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
    @@ -330,7 +330,7 @@ public class TestSearchArgumentImpl {
          SearchArgument sarg =
              SearchArgumentFactory.newBuilder()
                  .startAnd()
    - .lessThan("x", PredicateLeaf.Type.INTEGER, 10)
    + .lessThan("x", PredicateLeaf.Type.LONG, 10L)
                  .lessThanEquals("y", PredicateLeaf.Type.STRING, "hi")
                  .equals("z", PredicateLeaf.Type.FLOAT, 1.0)
                  .end()
    @@ -342,9 +342,9 @@ public class TestSearchArgumentImpl {
          sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    - .between("y", PredicateLeaf.Type.INTEGER, 10, 20)
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .isNull("x", PredicateLeaf.Type.LONG)
    + .between("y", PredicateLeaf.Type.LONG, 10L, 20L)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING, "stinger")
              .end()
              .end()
    @@ -376,10 +376,10 @@ public class TestSearchArgumentImpl {
          sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    + .isNull("x", PredicateLeaf.Type.LONG)
              .between("y", PredicateLeaf.Type.DECIMAL,
                  new HiveDecimalWritable("10"), new HiveDecimalWritable("20.0"))
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING,
                  new HiveVarchar("stinger", 100).toString())
              .end()
    @@ -413,10 +413,10 @@ public class TestSearchArgumentImpl {
          sarg = SearchArgumentFactory.newBuilder()
              .startNot()
              .startOr()
    - .isNull("x", PredicateLeaf.Type.INTEGER)
    + .isNull("x", PredicateLeaf.Type.LONG)
              .between("y", PredicateLeaf.Type.DECIMAL, new HiveDecimalWritable("10"),
                  new HiveDecimalWritable("20.0"))
    - .in("z", PredicateLeaf.Type.INTEGER, 1, 2, 3)
    + .in("z", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
              .nullSafeEquals("a", PredicateLeaf.Type.STRING,
                  new HiveVarchar("stinger", 100).toString())
              .end()
    @@ -435,8 +435,8 @@ public class TestSearchArgumentImpl {
          SearchArgument sarg =
              SearchArgumentFactory.newBuilder()
                  .startAnd()
    - .lessThan("x", PredicateLeaf.Type.INTEGER, new Integer((short) 22))
    - .lessThan("x1", PredicateLeaf.Type.INTEGER, new Integer(22))
    + .lessThan("x", PredicateLeaf.Type.LONG, 22L)
    + .lessThan("x1", PredicateLeaf.Type.LONG, 22L)
                  .lessThanEquals("y", PredicateLeaf.Type.STRING,
                      new HiveChar("hi", 10).toString())
                  .equals("z", PredicateLeaf.Type.FLOAT, new Double(0.22))
    @@ -480,7 +480,7 @@ public class TestSearchArgumentImpl {
          SearchArgument sarg =
              SearchArgumentFactory.newBuilder()
              .startAnd()
    - .lessThan("x", PredicateLeaf.Type.INTEGER, "hi")
    + .lessThan("x", PredicateLeaf.Type.LONG, "hi")
              .end()
              .build();
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/97bf32a1/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    ----------------------------------------------------------------------
    diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    index 3a92565..dc71db4 100644
    --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    @@ -47,8 +47,7 @@ public interface PredicateLeaf {
         * The possible types for sargs.
         */
        public static enum Type {
    - INTEGER(Integer.class), // all of the integer types except long
    - LONG(Long.class),
    + LONG(Long.class), // all of the integer types
          FLOAT(Double.class), // float and double
          STRING(String.class), // string, char, varchar
          DATE(Date.class),
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11664: Make tez container logs work with new log4j2 changes (Prasanth Jayachandran reviewed by Gopal V)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/607b0e8a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/607b0e8a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/607b0e8a

    Branch: refs/heads/beeline-cli
    Commit: 607b0e8a6b4da164606b87c4d012059276b3a994
    Parents: 9e85bbf
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Aug 27 17:14:17 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Aug 27 17:14:17 2015 -0500

    ----------------------------------------------------------------------
      data/conf/tez/hive-site.xml | 4 ++
      ql/src/main/resources/tez-container-log4j2.xml | 49 +++++++++++++++++++++
      2 files changed, 53 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/data/conf/tez/hive-site.xml
    ----------------------------------------------------------------------
    diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
    index e0238aa..2f9415a 100644
    --- a/data/conf/tez/hive-site.xml
    +++ b/data/conf/tez/hive-site.xml
    @@ -253,5 +253,9 @@
        </description>
      </property>

    +<property>
    + <name>hive.tez.java.opts</name>
    + <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
    +</property>

      </configuration>

    http://git-wip-us.apache.org/repos/asf/hive/blob/607b0e8a/ql/src/main/resources/tez-container-log4j2.xml
    ----------------------------------------------------------------------
    diff --git a/ql/src/main/resources/tez-container-log4j2.xml b/ql/src/main/resources/tez-container-log4j2.xml
    new file mode 100644
    index 0000000..be949dc
    --- /dev/null
    +++ b/ql/src/main/resources/tez-container-log4j2.xml
    @@ -0,0 +1,49 @@
    +<?xml version="1.0" encoding="UTF-8"?>
    +<!--
    + Licensed to the Apache Software Foundation (ASF) under one or more
    + contributor license agreements. See the NOTICE file distributed with
    + this work for additional information regarding copyright ownership.
    + The ASF licenses this file to You under the Apache License, Version 2.0
    + (the "License"); you may not use this file except in compliance with
    + the License. You may obtain a copy of the License at
    +
    + http://www.apache.org/licenses/LICENSE-2.0
    +
    + Unless required by applicable law or agreed to in writing, software
    + distributed under the License is distributed on an "AS IS" BASIS,
    + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + See the License for the specific language governing permissions and
    + limitations under the License.
    +-->
    +
    +<Configuration status="trace" strict="true" name="TezContainerLog4j2"
    + packages="org.apache.hadoop.hive.ql.log">
    +
    + <Properties>
    + <Property name="tez.container.log.threshold">ALL</Property>
    + <Property name="tez.container.log.level">INFO</Property>
    + <Property name="tez.container.root.logger">CLA</Property>
    + <Property name="tez.container.log.dir">${sys:yarn.app.container.log.dir}</Property>
    + <Property name="tez.container.log.file">syslog</Property>
    + </Properties>
    +
    + <Appenders>
    + <RollingFile name="CLA" fileName="${sys:tez.container.log.dir}/${sys:tez.container.log.file}"
    + filePattern="${sys:tez.container.log.dir}/${sys:tez.container.log.file}.%d{yyyy-MM-dd}">
    + <PatternLayout pattern="%d{ISO8601} %p [%t] %c{2}: %m%n" />
    + <Policies>
    + <!-- rollover@mignight (interval = 1 means daily) -->
    + <TimeBasedTriggeringPolicy interval="1" modulate="true"/>
    + </Policies>
    + <!-- 30-day backup -->
    + <!-- <DefaultRolloverStrategy max="30"/> -->
    + </RollingFile>
    + </Appenders>
    +
    + <Loggers>
    + <Root level="${sys:tez.container.log.threshold}">
    + <AppenderRef ref="${sys:tez.container.root.logger}" level="${sys:tez.container.log.level}"/>
    + </Root>
    + </Loggers>
    +
    +</Configuration>
  • Xuf at Sep 9, 2015 at 7:08 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
    new file mode 100644
    index 0000000..745237d
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out
    @@ -0,0 +1,292 @@
    +PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@newtypestbl
    +POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@newtypestbl
    +PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@newtypestbl
    +POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2) uniontbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@newtypestbl
    +POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.ts EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
    +PREHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
    +select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
    +select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts=cast('2011-01-01 01:01:01' as varchar(20))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1033237945500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts!=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1033237945500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-20 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2011-01-22 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<cast('2010-10-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts<=cast('2011-01-20 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1033237945500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1033237945500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-01 01:01:01' as timestamp), cast('2011-01-20 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts in (cast('2011-01-02 01:01:01' as timestamp), cast('2011-01-08 01:01:01' as timestamp))
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-08 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +445653015500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2011-01-25 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +1478890961000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where ts between cast('2010-10-01 01:01:01' as timestamp) and cast('2010-11-01 01:01:01' as timestamp)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL

    http://git-wip-us.apache.org/repos/asf/hive/blob/f2056a13/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
    new file mode 100644
    index 0000000..23e3cd0
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out
    @@ -0,0 +1,220 @@
    +PREHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@newtypestbl
    +POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@newtypestbl
    +PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@newtypestbl
    +POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2) uniontbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@newtypestbl
    +POSTHOOK: Lineage: newtypestbl.c EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.d EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.da EXPRESSION []
    +POSTHOOK: Lineage: newtypestbl.v EXPRESSION []
    +PREHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
    +select sum(hash(*)) from newtypestbl where v="bee"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- varchar data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests)
    +select sum(hash(*)) from newtypestbl where v="bee"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +334427804500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v!="bee"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +334427804500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<"world"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v<="world"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v="bee "
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "orange")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("bee", "world")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v in ("orange")
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "orange"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +-252951929000
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "bee" and "zombie"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +81475875500
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
    +PREHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select sum(hash(*)) from newtypestbl where v between "orange" and "pine"
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@newtypestbl
    +#### A masked pattern was here ####
    +NULL
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11638: ExprNodeDesc hashMap accidentally degrades into O(N) instead of O(1) (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/037fb02a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/037fb02a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/037fb02a

    Branch: refs/heads/beeline-cli
    Commit: 037fb02a8edc1266d0beb02eee1fb90737fc1ef7
    Parents: bb7153f
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 02:27:35 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 02:27:35 2015 +0200

    ----------------------------------------------------------------------
      .../java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java | 4 ++--
      .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java | 8 ++++++++
      .../java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java | 9 +++++----
      3 files changed, 15 insertions(+), 6 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    index 366b74b..6a81170 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LeadLagInfo.java
    @@ -19,7 +19,7 @@
      package org.apache.hadoop.hive.ql.parse;

      import java.util.ArrayList;
    -import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;

    @@ -53,7 +53,7 @@ public class LeadLagInfo {
        public void addLLFuncExprForTopExpr(ExprNodeDesc topExpr, ExprNodeGenericFuncDesc llFuncExpr) {
          addLeadLagExpr(llFuncExpr);
          mapTopExprToLLFunExprs = mapTopExprToLLFunExprs == null ?
    - new HashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
    + new IdentityHashMap<ExprNodeDesc, List<ExprNodeGenericFuncDesc>>() : mapTopExprToLLFunExprs;
          List<ExprNodeGenericFuncDesc> funcList = mapTopExprToLLFunExprs.get(topExpr);
          if (funcList == null) {
            funcList = new ArrayList<ExprNodeGenericFuncDesc>();

    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    index 15267b9..328bd86 100755
    --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
    @@ -60,6 +60,14 @@ public abstract class ExprNodeDesc implements Serializable, Node {
          return typeInfo.hashCode();
        }

    + @Override
    + public final boolean equals(Object o) {
    + // prevent equals from being overridden in sub-classes
    + // always use ExprNodeDescEqualityWrapper
    + // if you need any other equality than Object.equals()
    + return (o == this);
    + }
    +
        public TypeInfo getTypeInfo() {
          return typeInfo;
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/037fb02a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    index 33ad3e8..f23facf 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.ppd;

      import java.util.ArrayList;
      import java.util.HashMap;
    +import java.util.IdentityHashMap;
      import java.util.List;
      import java.util.Map;
      import java.util.Map.Entry;
    @@ -98,17 +99,17 @@ public class ExprWalkerInfo implements NodeProcessorCtx {
        public ExprWalkerInfo() {
          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        public ExprWalkerInfo(Operator<? extends OperatorDesc> op) {
          this.op = op;

          pushdownPreds = new HashMap<String, List<ExprNodeDesc>>();
    - exprInfoMap = new HashMap<ExprNodeDesc, ExprInfo>();
    + exprInfoMap = new IdentityHashMap<ExprNodeDesc, ExprInfo>();
          nonFinalPreds = new HashMap<String, List<ExprNodeDesc>>();
    - newToOldExprMap = new HashMap<ExprNodeDesc, ExprNodeDesc>();
    + newToOldExprMap = new IdentityHashMap<ExprNodeDesc, ExprNodeDesc>();
        }

        /**
  • Xuf at Sep 9, 2015 at 7:08 am
    "Alter index rebuild" statements submitted through HiveServer2 fail when Sentry is enabled (Aihua Xu, reviewed by Chao Sun and Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcf21cd6
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcf21cd6
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcf21cd6

    Branch: refs/heads/beeline-cli
    Commit: dcf21cd6fa98fb5db01ef661bb3b9f94d9ca2d15
    Parents: af91308
    Author: Aihua Xu <aihuaxu@gmail.com>
    Authored: Sat Aug 29 12:57:52 2015 -0700
    Committer: Chao Sun <sunchao@apache.org>
    Committed: Sat Aug 29 12:57:52 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/optimizer/IndexUtils.java | 13 +++++++++----
      1 file changed, 9 insertions(+), 4 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/dcf21cd6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
    index 92cae67..0b30258 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
    @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereProcessor;
      import org.apache.hadoop.hive.ql.parse.ParseContext;
      import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
      import org.apache.hadoop.hive.ql.parse.SemanticException;
    +import org.apache.hadoop.hive.ql.session.SessionState;

      /**
       * Utility class for index support.
    @@ -213,13 +214,17 @@ public final class IndexUtils {
          return hive.getIndexes(table.getTTable().getDbName(), table.getTTable().getTableName(), max);
        }

    - public static Task<?> createRootTask(HiveConf builderConf, Set<ReadEntity> inputs,
    - Set<WriteEntity> outputs, StringBuilder command,
    + public static Task<?> createRootTask(
    + HiveConf builderConf,
    + Set<ReadEntity> inputs,
    + Set<WriteEntity> outputs,
    + StringBuilder command,
            LinkedHashMap<String, String> partSpec,
    - String indexTableName, String dbName){
    + String indexTableName,
    + String dbName){
          // Don't try to index optimize the query to build the index
          HiveConf.setBoolVar(builderConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER, false);
    - Driver driver = new Driver(builderConf);
    + Driver driver = new Driver(builderConf, SessionState.get().getUserName());
          driver.compile(command.toString(), false);

          Task<?> rootTask = driver.getPlan().getRootTasks().get(0);
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11700: exception in logs in Tez test with new logger (Prasanth Jayachandran reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1fc9320f
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1fc9320f
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1fc9320f

    Branch: refs/heads/beeline-cli
    Commit: 1fc9320f07b066e4850a04958a2c73643b5ad5b1
    Parents: 5a1957f
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Wed Sep 2 15:56:15 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Wed Sep 2 15:56:15 2015 -0500

    ----------------------------------------------------------------------
      data/conf/hive-log4j2.xml | 3 ---
      1 file changed, 3 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/1fc9320f/data/conf/hive-log4j2.xml
    ----------------------------------------------------------------------
    diff --git a/data/conf/hive-log4j2.xml b/data/conf/hive-log4j2.xml
    index 11c8e79..452f01f 100644
    --- a/data/conf/hive-log4j2.xml
    +++ b/data/conf/hive-log4j2.xml
    @@ -94,9 +94,6 @@
          <Logger name="org.apache.zookeeper.ClientCnxnSocketNIO" level="WARN">
            <AppenderRef ref="${sys:hive.root.logger}"/>
          </Logger>
    - <Logger name="org.apache.hadoop.hive.ql.log.PerfLogger" level="${sys:hive.ql.log.PerfLogger.level}">
    - <AppenderRef ref="${sys:hive.ql.log.PerfLogger.logger}"/>
    - </Logger>
          <Logger name="org.apache.hadoop.hive.ql.exec.Operator" level="INFO">
            <AppenderRef ref="${sys:hive.root.logger}"/>
          </Logger>
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-10175: DynamicPartitionPruning lacks a fast-path exit for large IN() queries (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b6d1143a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b6d1143a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b6d1143a

    Branch: refs/heads/beeline-cli
    Commit: b6d1143aa7aaa20de035898f34df2d6b581895b6
    Parents: d147a79
    Author: Gopal V <gopalv@apache.org>
    Authored: Fri Aug 28 01:22:45 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Fri Aug 28 01:22:45 2015 -0700

    ----------------------------------------------------------------------
      .../optimizer/DynamicPartitionPruningOptimization.java | 12 ++++++++++++
      1 file changed, 12 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b6d1143a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    index f475926..5ebd28a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
    @@ -189,6 +189,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
            LOG.debug("TableScan: " + ts);
          }

    + if (ts == null) {
    + // could be a reduce sink
    + LOG.warn("Could not find the table scan for " + filter);
    + return null;
    + } else {
    + Table table = ts.getConf().getTableMetadata();
    + if (table != null && !table.isPartitioned()) {
    + // table is not partitioned, skip optimizer
    + return null;
    + }
    + }
    +
          // collect the dynamic pruning conditions
          removerContext.dynLists.clear();
          walkExprTree(desc.getPredicate(), removerContext);
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11629: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : fix the filter expressions for full outer join and right outer join (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5aa16ecb
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5aa16ecb
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5aa16ecb

    Branch: refs/heads/beeline-cli
    Commit: 5aa16ecb3aadbeb1770ae08f1f1d476503cbbb6e
    Parents: dcf21cd
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Sat Aug 29 23:48:09 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Sat Aug 29 23:48:09 2015 -0700

    ----------------------------------------------------------------------
      .../calcite/translator/HiveOpConverter.java | 22 +-
      .../clientpositive/cbo_rp_outer_join_ppr.q | 40 +
      .../cbo_rp_outer_join_ppr.q.java1.7.out | 855 +++++++++++++++++++
      3 files changed, 914 insertions(+), 3 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    index 1931880..9391952 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    @@ -995,7 +995,7 @@ public class HiveOpConverter {
         * to be expressed that way.
         */
        private static int updateExprNode(ExprNodeDesc expr, final Map<String, Byte> reversedExprs,
    - final Map<String, ExprNodeDesc> colExprMap) {
    + final Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
          int inputPos = -1;
          if (expr instanceof ExprNodeGenericFuncDesc) {
            ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
    @@ -1003,10 +1003,26 @@ public class HiveOpConverter {
            for (ExprNodeDesc functionChild : func.getChildren()) {
              if (functionChild instanceof ExprNodeColumnDesc) {
                String colRef = functionChild.getExprString();
    - inputPos = reversedExprs.get(colRef);
    + int pos = reversedExprs.get(colRef);
    + if (pos != -1) {
    + if (inputPos == -1) {
    + inputPos = pos;
    + } else if (inputPos != pos) {
    + throw new SemanticException(
    + "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
    + }
    + }
                newChildren.add(colExprMap.get(colRef));
              } else {
    - inputPos = updateExprNode(functionChild, reversedExprs, colExprMap);
    + int pos = updateExprNode(functionChild, reversedExprs, colExprMap);
    + if (pos != -1) {
    + if (inputPos == -1) {
    + inputPos = pos;
    + } else if (inputPos != pos) {
    + throw new SemanticException(
    + "UpdateExprNode is expecting only one position for join operator convert. But there are more than one.");
    + }
    + }
                newChildren.add(functionChild);
              }
            }

    http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
    new file mode 100644
    index 0000000..8daf718
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q
    @@ -0,0 +1,40 @@
    +set hive.cbo.enable=true;
    +set hive.cbo.returnpath.hiveop=true;
    +set hive.optimize.ppd=true;
    +
    +-- SORT_QUERY_RESULTS
    +-- JAVA_VERSION_SPECIFIC_OUTPUT
    +
    +EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
    +
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25;
    +
    +EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';
    +
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08';

    http://git-wip-us.apache.org/repos/asf/hive/blob/5aa16ecb/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
    new file mode 100644
    index 0000000..c19b47a
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out
    @@ -0,0 +1,855 @@
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +-- JAVA_VERSION_SPECIFIC_OUTPUT
    +
    +EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +-- JAVA_VERSION_SPECIFIC_OUTPUT
    +
    +EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_FULLOUTERJOIN
    + TOK_TABREF
    + TOK_TABNAME
    + src
    + a
    + TOK_TABREF
    + TOK_TABNAME
    + srcpart
    + b
    + AND
    + =
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + =
    + .
    + TOK_TABLE_OR_COL
    + b
    + ds
    + '2008-04-08'
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + a
    + value
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + b
    + value
    + TOK_WHERE
    + AND
    + AND
    + AND
    + >
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + 10
    + <
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + 20
    + >
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + 15
    + <
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + 25
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Select Operator
    + expressions: key (type: string), value (type: string)
    + outputColumnNames: key, value
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: b
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Select Operator
    + expressions: key (type: string), value (type: string), ds (type: string)
    + outputColumnNames: key, value, ds
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string), ds (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: src
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.src
    + numFiles 1
    + numRows 500
    + rawDataSize 5312
    + serialization.ddl struct src { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.src
    + numFiles 1
    + numRows 500
    + rawDataSize 5312
    + serialization.ddl struct src { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.src
    + name: default.src
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=11
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-08
    + hr 11
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=12
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-08
    + hr 12
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=11
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-09
    + hr 11
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=12
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-09
    + hr 12
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    + Truncated Path -> Alias:
    + /src [a]
    + /srcpart/ds=2008-04-08/hr=11 [b]
    + /srcpart/ds=2008-04-08/hr=12 [b]
    + /srcpart/ds=2008-04-09/hr=11 [b]
    + /srcpart/ds=2008-04-09/hr=12 [b]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Outer Join 0 to 1
    + filter mappings:
    + 1 [0, 1]
    + filter predicates:
    + 0
    + 1 {(VALUE.ds = '2008-04-08')}
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + outputColumnNames: key, value, key0, value0
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) and (UDFToDouble(key0) > 15.0)) and (UDFToDouble(key0) < 25.0)) (type: boolean)
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
    + outputColumnNames: key, value, key0, value0
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns key,value,key0,value0
    + columns.types string:string:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key AND b.ds = '2008-04-08')
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +17 val_17 17 val_17
    +17 val_17 17 val_17
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +19 val_19 19 val_19
    +19 val_19 19 val_19
    +PREHOOK: query: EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN EXTENDED
    + FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
    +POSTHOOK: type: QUERY
    +ABSTRACT SYNTAX TREE:
    +
    +TOK_QUERY
    + TOK_FROM
    + TOK_FULLOUTERJOIN
    + TOK_TABREF
    + TOK_TABNAME
    + src
    + a
    + TOK_TABREF
    + TOK_TABNAME
    + srcpart
    + b
    + =
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + TOK_INSERT
    + TOK_DESTINATION
    + TOK_DIR
    + TOK_TMP_FILE
    + TOK_SELECT
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + a
    + value
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + TOK_SELEXPR
    + .
    + TOK_TABLE_OR_COL
    + b
    + value
    + TOK_WHERE
    + AND
    + AND
    + AND
    + AND
    + >
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + 10
    + <
    + .
    + TOK_TABLE_OR_COL
    + a
    + key
    + 20
    + >
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + 15
    + <
    + .
    + TOK_TABLE_OR_COL
    + b
    + key
    + 25
    + =
    + .
    + TOK_TABLE_OR_COL
    + b
    + ds
    + '2008-04-08'
    +
    +
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: a
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
    + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string), value (type: string)
    + outputColumnNames: key, value
    + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
    + tag: 0
    + value expressions: value (type: string)
    + auto parallelism: false
    + TableScan
    + alias: b
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + GatherStats: false
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string), value (type: string)
    + outputColumnNames: key, value
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + tag: 1
    + value expressions: value (type: string)
    + auto parallelism: false
    + Path -> Alias:
    +#### A masked pattern was here ####
    + Path -> Partition:
    +#### A masked pattern was here ####
    + Partition
    + base file name: src
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.src
    + numFiles 1
    + numRows 500
    + rawDataSize 5312
    + serialization.ddl struct src { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.src
    + numFiles 1
    + numRows 500
    + rawDataSize 5312
    + serialization.ddl struct src { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.src
    + name: default.src
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=11
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-08
    + hr 11
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    +#### A masked pattern was here ####
    + Partition
    + base file name: hr=12
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + partition values:
    + ds 2008-04-08
    + hr 12
    + properties:
    + COLUMN_STATS_ACCURATE true
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + numFiles 1
    + numRows 500
    + partition_columns ds/hr
    + partition_columns.types string:string
    + rawDataSize 5312
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + totalSize 5812
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + bucket_count -1
    + columns key,value
    + columns.comments 'default','default'
    + columns.types string:string
    +#### A masked pattern was here ####
    + name default.srcpart
    + partition_columns ds/hr
    + partition_columns.types string:string
    + serialization.ddl struct srcpart { string key, string value}
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +#### A masked pattern was here ####
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + name: default.srcpart
    + name: default.srcpart
    + Truncated Path -> Alias:
    + /src [a]
    + /srcpart/ds=2008-04-08/hr=11 [b]
    + /srcpart/ds=2008-04-08/hr=12 [b]
    + Needs Tagging: true
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Right Outer Join0 to 1
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + outputColumnNames: key, value, key0, value0
    + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + isSamplingPred: false
    + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean)
    + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string)
    + outputColumnNames: key, value, key0, value0
    + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + GlobalTableId: 0
    +#### A masked pattern was here ####
    + NumFilesPerFileSink: 1
    + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE
    +#### A masked pattern was here ####
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + properties:
    + columns key,value,key0,value0
    + columns.types string:string:string:string
    + escape.delim \
    + hive.serialization.extend.additional.nesting.levels true
    + serialization.format 1
    + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + TotalFiles: 1
    + GatherStats: false
    + MultiFileSpray: false
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: FROM
    + src a
    + FULL OUTER JOIN
    + srcpart b
    + ON (a.key = b.key)
    + SELECT a.key, a.value, b.key, b.value
    + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +#### A masked pattern was here ####
    +17 val_17 17 val_17
    +17 val_17 17 val_17
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +18 val_18 18 val_18
    +19 val_19 19 val_19
    +19 val_19 19 val_19
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11670 : Strip out password information from TezSessionState configuration (Hari Subramaniyan, reviewed by Vikram Dixit K)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78e70159
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78e70159
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78e70159

    Branch: refs/heads/beeline-cli
    Commit: 78e701590f1cb8b8b07a4871280a31f7c3d35034
    Parents: 5aa16ec
    Author: Hari Subramaniyan <harisankar@apache.org>
    Authored: Mon Aug 31 11:55:07 2015 -0700
    Committer: Hari Subramaniyan <harisankar@apache.org>
    Committed: Mon Aug 31 11:55:07 2015 -0700

    ----------------------------------------------------------------------
      .../java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java | 2 ++
      1 file changed, 2 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/78e70159/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    index 8555c6a..568ebbe 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.permission.FsPermission;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
    +import org.apache.hadoop.hive.ql.exec.Utilities;
      import org.apache.hadoop.hive.ql.session.SessionState;
      import org.apache.hadoop.hive.shims.ShimLoader;
      import org.apache.hadoop.hive.shims.Utils;
    @@ -165,6 +166,7 @@ public class TezSessionState {
          // generate basic tez config
          TezConfiguration tezConfig = new TezConfiguration(conf);
          tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString());
    + Utilities.stripHivePasswordDetails(tezConfig);

          if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
            int n = HiveConf.getIntVar(conf, ConfVars.HIVE_PREWARM_NUM_CONTAINERS);
  • Xuf at Sep 9, 2015 at 7:08 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/7281a460/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
    index 71d8f41..566b451 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
    @@ -58,10 +58,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
      POSTHOOK: Output: default@ss
      POSTHOOK: Lineage: ss.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: ss.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
    -POSTHOOK: Lineage: ss.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: ss.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: ss.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
      POSTHOOK: Lineage: ss.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
    -POSTHOOK: Lineage: ss.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: ss.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
      PREHOOK: query: INSERT OVERWRITE TABLE sr
      SELECT x.key,x.value,y.key,y.value,z.key,z.value
      FROM src1 x
    @@ -86,10 +86,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
      POSTHOOK: Output: default@sr
      POSTHOOK: Lineage: sr.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: sr.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
    -POSTHOOK: Lineage: sr.k3 EXPRESSION [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: sr.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: sr.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
      POSTHOOK: Lineage: sr.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
    -POSTHOOK: Lineage: sr.v3 EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: sr.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
      PREHOOK: query: INSERT OVERWRITE TABLE cs
      SELECT x.key,x.value,y.key,y.value,z.key,z.value
      FROM src1 x
    @@ -115,10 +115,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
      POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
      POSTHOOK: Output: default@cs
      POSTHOOK: Lineage: cs.k1 SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
    -POSTHOOK: Lineage: cs.k2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: cs.k2 SIMPLE [(src)y.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: cs.k3 SIMPLE [(srcpart)z.FieldSchema(name:key, type:string, comment:default), ]
      POSTHOOK: Lineage: cs.v1 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ]
    -POSTHOOK: Lineage: cs.v2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: cs.v2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
      POSTHOOK: Lineage: cs.v3 SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
      PREHOOK: query: ANALYZE TABLE ss COMPUTE STATISTICS
      PREHOOK: type: QUERY
  • Xuf at Sep 9, 2015 at 7:08 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index 4d797f2..7595c3e 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -2713,62 +2713,62 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 6
    - File Output Operator [FS_50]
    + File Output Operator [FS_53]
                  compressed:false
                  Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Select Operator [SEL_49]
    + Select Operator [SEL_52]
    outputColumnNames:["_col0","_col1","_col2"]
    Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 5 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_48]
    + Reduce Output Operator [RS_51]
                        key expressions:_col1 (type: bigint), _col0 (type: string)
                        sort order:++
                        Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                        value expressions:_col2 (type: bigint)
    - Group By Operator [GBY_46]
    + Group By Operator [GBY_49]
    aggregations:["count(VALUE._col0)"]
    keys:KEY._col0 (type: string), KEY._col1 (type: bigint)
    outputColumnNames:["_col0","_col1","_col2"]
    Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 4 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_45]
    + Reduce Output Operator [RS_48]
                              key expressions:_col0 (type: string), _col1 (type: bigint)
                              Map-reduce partition columns:_col0 (type: string), _col1 (type: bigint)
                              sort order:++
                              Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
                              value expressions:_col2 (type: bigint)
    - Group By Operator [GBY_44]
    + Group By Operator [GBY_47]
                                 aggregations:["count()"]
                                 keys:_col0 (type: string), _col1 (type: bigint)
                                 outputColumnNames:["_col0","_col1","_col2"]
                                 Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
    - Merge Join Operator [MERGEJOIN_63]
    + Merge Join Operator [MERGEJOIN_66]
    condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}]
    keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"}
    outputColumnNames:["_col0","_col1"]
    Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 10 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_41]
    + | Reduce Output Operator [RS_44]
    key expressions:_col0 (type: string)
    Map-reduce partition columns:_col0 (type: string)
    sort order:+
    - | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
    - | Group By Operator [GBY_35]
    + | Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
    + | Group By Operator [GBY_38]
    keys:_col0 (type: string)
    outputColumnNames:["_col0"]
    - | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
    - | Select Operator [SEL_31]
    + | Statistics:Num rows: 3 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
    + | Select Operator [SEL_34]
    outputColumnNames:["_col0"]
    - | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_61]
    - | predicate:key is not null (type: boolean)
    - | Statistics:Num rows: 18 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
    - | TableScan [TS_30]
    + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
    + | Filter Operator [FIL_64]
    + | predicate:(UDFToDouble(key) > 0.0) (type: boolean)
    + | Statistics:Num rows: 6 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE
    + | TableScan [TS_32]
    alias:cbo_t3
    Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 3 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_37]
    + | Reduce Output Operator [RS_40]
    key expressions:_col0 (type: string)
    Map-reduce partition columns:_col0 (type: string)
    sort order:+
    @@ -2783,16 +2783,16 @@ Stage-0
    sort order:-+
    Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col0 (type: string)
    - | Filter Operator [FIL_57]
    + | Filter Operator [FIL_60]
    predicate:(((_col1 + 1) >= 0) and ((_col1 > 0) or (UDFToDouble(_col0) >= 0.0))) (type: boolean)
    Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
    Select Operator [SEL_9]
    outputColumnNames:["_col0","_col1","_col2","_col3"]
    Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_58]
    + | Filter Operator [FIL_61]
    predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean)
    Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
    - | Select Operator [SEL_62]
    + | Select Operator [SEL_65]
    outputColumnNames:["_col1","_col2","_col3"]
    Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE
    Group By Operator [GBY_8]
    @@ -2815,19 +2815,19 @@ Stage-0
    Select Operator [SEL_2]
    outputColumnNames:["_col0","_col1","_col2"]
    Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_59]
    + | Filter Operator [FIL_62]
    predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
    Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:cbo_t1
    Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 9 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_39]
    + Reduce Output Operator [RS_42]
                                       key expressions:_col0 (type: string)
                                       Map-reduce partition columns:_col0 (type: string)
                                       sort order:+
                                       Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_33]
    + Group By Operator [GBY_36]
                                          keys:_col0 (type: string)
                                          outputColumnNames:["_col0"]
                                          Statistics:Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
    @@ -2862,8 +2862,8 @@ Stage-0
                                                            Select Operator [SEL_19]
                                                               outputColumnNames:["_col0","_col1","_col2"]
                                                               Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_60]
    - predicate:(((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and key is not null) (type: boolean)
    + Filter Operator [FIL_63]
    + predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean)
                                                                  Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE
                                                                  TableScan [TS_17]
                                                                     alias:cbo_t2
    @@ -3181,47 +3181,47 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 2
    - File Output Operator [FS_14]
    + File Output Operator [FS_16]
                  compressed:false
    - Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Merge Join Operator [MERGEJOIN_19]
    + Merge Join Operator [MERGEJOIN_21]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 1 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_9]
    + | Reduce Output Operator [RS_11]
    key expressions:_col1 (type: string), _col0 (type: string)
    Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
    sort order:++
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    Select Operator [SEL_2]
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_17]
    - | predicate:(value is not null and key is not null) (type: boolean)
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    + | Filter Operator [FIL_19]
    + | predicate:((value > 'val_9') and key is not null) (type: boolean)
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:b
    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 3 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_11]
    + Reduce Output Operator [RS_13]
                        key expressions:_col0 (type: string), _col1 (type: string)
                        Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
                        sort order:++
                        Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_7]
    + Group By Operator [GBY_9]
                           keys:_col0 (type: string), _col1 (type: string)
                           outputColumnNames:["_col0","_col1"]
                           Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_5]
    + Select Operator [SEL_7]
                              outputColumnNames:["_col0","_col1"]
                              Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_18]
    + Filter Operator [FIL_20]
                                 predicate:((value > 'val_9') and key is not null) (type: boolean)
                                 Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    - TableScan [TS_3]
    + TableScan [TS_5]
                                    alias:b
                                    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE

    @@ -3253,47 +3253,47 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 2
    - File Output Operator [FS_14]
    + File Output Operator [FS_16]
                  compressed:false
    - Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Merge Join Operator [MERGEJOIN_19]
    + Merge Join Operator [MERGEJOIN_21]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    keys:{"1":"_col0 (type: string), _col1 (type: string)","0":"_col1 (type: string), _col0 (type: string)"}
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 1 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_9]
    + | Reduce Output Operator [RS_11]
    key expressions:_col1 (type: string), _col0 (type: string)
    Map-reduce partition columns:_col1 (type: string), _col0 (type: string)
    sort order:++
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    Select Operator [SEL_2]
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_17]
    - | predicate:(value is not null and key is not null) (type: boolean)
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    + | Filter Operator [FIL_19]
    + | predicate:((value > 'val_9') and key is not null) (type: boolean)
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:b
    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 3 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_11]
    + Reduce Output Operator [RS_13]
                        key expressions:_col0 (type: string), _col1 (type: string)
                        Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
                        sort order:++
                        Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_7]
    + Group By Operator [GBY_9]
                           keys:_col0 (type: string), _col1 (type: string)
                           outputColumnNames:["_col0","_col1"]
                           Statistics:Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_5]
    + Select Operator [SEL_7]
                              outputColumnNames:["_col0","_col1"]
                              Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_18]
    + Filter Operator [FIL_20]
                                 predicate:((value > 'val_9') and key is not null) (type: boolean)
                                 Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    - TableScan [TS_3]
    + TableScan [TS_5]
                                    alias:b
                                    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE

    @@ -3315,48 +3315,48 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 2
    - File Output Operator [FS_14]
    + File Output Operator [FS_16]
                  compressed:false
    - Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Merge Join Operator [MERGEJOIN_19]
    + Merge Join Operator [MERGEJOIN_21]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 1 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_9]
    + | Reduce Output Operator [RS_11]
    key expressions:_col0 (type: string)
    Map-reduce partition columns:_col0 (type: string)
    sort order:+
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col1 (type: string)
    Select Operator [SEL_2]
    outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_17]
    - | predicate:key is not null (type: boolean)
    - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    + | Filter Operator [FIL_19]
    + | predicate:(key > '9') (type: boolean)
    + | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:src_cbo
    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 3 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_11]
    + Reduce Output Operator [RS_13]
                        key expressions:_col0 (type: string)
                        Map-reduce partition columns:_col0 (type: string)
                        sort order:+
                        Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_7]
    + Group By Operator [GBY_9]
                           keys:_col0 (type: string)
                           outputColumnNames:["_col0"]
                           Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_5]
    + Select Operator [SEL_7]
                              outputColumnNames:["_col0"]
                              Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_18]
    + Filter Operator [FIL_20]
                                 predicate:(key > '9') (type: boolean)
                                 Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - TableScan [TS_3]
    + TableScan [TS_5]
                                    alias:src_cbo
                                    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE

    @@ -3382,92 +3382,92 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 3
    - File Output Operator [FS_26]
    + File Output Operator [FS_28]
                  compressed:false
                  Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Select Operator [SEL_25]
    + Select Operator [SEL_27]
                     outputColumnNames:["_col0","_col1"]
                     Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
    - Merge Join Operator [MERGEJOIN_36]
    + Merge Join Operator [MERGEJOIN_38]
    condition map:[{"":"Inner Join 0 to 1"}]
    keys:{"1":"_col0 (type: int)","0":"_col1 (type: int)"}
    outputColumnNames:["_col1","_col2"]
    Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 2 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_21]
    + | Reduce Output Operator [RS_23]
    key expressions:_col1 (type: int)
    Map-reduce partition columns:_col1 (type: int)
    sort order:+
    Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col2 (type: int)
    - | Merge Join Operator [MERGEJOIN_35]
    + | Merge Join Operator [MERGEJOIN_37]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    - | | keys:{"1":"_col0 (type: int), _col1 (type: int)","0":"_col0 (type: int), _col3 (type: int)"}
    + | | keys:{"1":"_col0 (type: int)","0":"_col0 (type: int)"}
    outputColumnNames:["_col1","_col2"]
    Statistics:Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 1 [SIMPLE_EDGE]
    - | | Reduce Output Operator [RS_16]
    - | | key expressions:_col0 (type: int), _col3 (type: int)
    - | | Map-reduce partition columns:_col0 (type: int), _col3 (type: int)
    - | | sort order:++
    + | | Reduce Output Operator [RS_18]
    + | | key expressions:_col0 (type: int)
    + | | Map-reduce partition columns:_col0 (type: int)
    + | | sort order:+
    Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col1 (type: int), _col2 (type: int)
    Select Operator [SEL_2]
    - | | outputColumnNames:["_col0","_col1","_col2","_col3"]
    + | | outputColumnNames:["_col0","_col1","_col2"]
    Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
    - | | Filter Operator [FIL_32]
    + | | Filter Operator [FIL_34]
    predicate:(((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
    Statistics:Num rows: 16 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:lineitem
    Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 4 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_18]
    - | key expressions:_col0 (type: int), _col1 (type: int)
    - | Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
    - | sort order:++
    - | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
    - | Group By Operator [GBY_14]
    - | keys:_col0 (type: int), _col1 (type: int)
    - | outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
    + | Reduce Output Operator [RS_20]
    + | key expressions:_col0 (type: int)
    + | Map-reduce partition columns:_col0 (type: int)
    + | sort order:+
    + | Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
    + | Group By Operator [GBY_16]
    + | keys:_col0 (type: int)
    + | outputColumnNames:["_col0"]
    + | Statistics:Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
    Select Operator [SEL_5]
    - | outputColumnNames:["_col0","_col1"]
    - | Statistics:Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_33]
    - | predicate:(((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + | outputColumnNames:["_col0"]
    + | Statistics:Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
    + | Filter Operator [FIL_35]
    + | predicate:(((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
    Statistics:Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_3]
    alias:lineitem
    Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 6 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_23]
    + Reduce Output Operator [RS_25]
                           key expressions:_col0 (type: int)
                           Map-reduce partition columns:_col0 (type: int)
                           sort order:+
                           Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_11]
    + Group By Operator [GBY_13]
    keys:KEY._col0 (type: int)
    outputColumnNames:["_col0"]
    Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 5 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_10]
    + Reduce Output Operator [RS_12]
                                 key expressions:_col0 (type: int)
                                 Map-reduce partition columns:_col0 (type: int)
                                 sort order:+
                                 Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_9]
    + Group By Operator [GBY_11]
                                    keys:_col0 (type: int)
                                    outputColumnNames:["_col0"]
                                    Statistics:Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_7]
    + Select Operator [SEL_9]
                                       outputColumnNames:["_col0"]
                                       Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_34]
    + Filter Operator [FIL_36]
                                          predicate:l_partkey is not null (type: boolean)
                                          Statistics:Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
    - TableScan [TS_6]
    + TableScan [TS_8]
                                             alias:lineitem
                                             Statistics:Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE

    @@ -3496,125 +3496,125 @@ Stage-0
            limit:-1
            Stage-1
               Reducer 4
    - File Output Operator [FS_36]
    + File Output Operator [FS_38]
                  compressed:false
                  Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
                  table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
    - Merge Join Operator [MERGEJOIN_49]
    + Merge Join Operator [MERGEJOIN_51]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"}
    outputColumnNames:["_col0","_col1","_col2"]
    Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 3 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_31]
    + | Reduce Output Operator [RS_33]
    key expressions:_col2 (type: bigint)
    Map-reduce partition columns:_col2 (type: bigint)
    sort order:+
    - | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col0 (type: string), _col1 (type: string)
    - | Filter Operator [FIL_42]
    + | Filter Operator [FIL_44]
    predicate:_col2 is not null (type: boolean)
    - | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
    - | Group By Operator [GBY_18]
    + | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
    + | Group By Operator [GBY_20]
    aggregations:["count(VALUE._col0)"]
    keys:KEY._col0 (type: string), KEY._col1 (type: string)
    outputColumnNames:["_col0","_col1","_col2"]
    - | | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
    + | | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 2 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_17]
    + | Reduce Output Operator [RS_19]
    key expressions:_col0 (type: string), _col1 (type: string)
    Map-reduce partition columns:_col0 (type: string), _col1 (type: string)
    sort order:++
    - | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
    + | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col2 (type: bigint)
    - | Group By Operator [GBY_16]
    + | Group By Operator [GBY_18]
    aggregations:["count()"]
    keys:_col0 (type: string), _col1 (type: string)
    outputColumnNames:["_col0","_col1","_col2"]
    - | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE
    - | Merge Join Operator [MERGEJOIN_48]
    + | Statistics:Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE
    + | Merge Join Operator [MERGEJOIN_50]
    condition map:[{"":"Left Semi Join 0 to 1"}]
    keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"}
    outputColumnNames:["_col0","_col1"]
    - | | Statistics:Num rows: 168 Data size: 29904 Basic stats: COMPLETE Column stats: COMPLETE
    + | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 1 [SIMPLE_EDGE]
    - | | Reduce Output Operator [RS_11]
    + | | Reduce Output Operator [RS_13]
    key expressions:_col0 (type: string)
    Map-reduce partition columns:_col0 (type: string)
    sort order:+
    - | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    value expressions:_col1 (type: string)
    Select Operator [SEL_2]
    outputColumnNames:["_col0","_col1"]
    - | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    - | | Filter Operator [FIL_43]
    - | | predicate:key is not null (type: boolean)
    - | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
    + | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    + | | Filter Operator [FIL_45]
    + | | predicate:(key > '8') (type: boolean)
    + | | Statistics:Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
    TableScan [TS_0]
    alias:b
    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 5 [SIMPLE_EDGE]
    - | Reduce Output Operator [RS_13]
    + | Reduce Output Operator [RS_15]
    key expressions:_col0 (type: string)
    Map-reduce partition columns:_col0 (type: string)
    sort order:+
    Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
    - | Group By Operator [GBY_9]
    + | Group By Operator [GBY_11]
    keys:_col0 (type: string)
    outputColumnNames:["_col0"]
    Statistics:Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE
    - | Select Operator [SEL_7]
    + | Select Operator [SEL_9]
    outputColumnNames:["_col0"]
    Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - | Filter Operator [FIL_44]
    + | Filter Operator [FIL_46]
    predicate:(key > '8') (type: boolean)
    Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - | TableScan [TS_5]
    + | TableScan [TS_7]
    alias:b
    Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
    <-Reducer 7 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_33]
    + Reduce Output Operator [RS_35]
                        key expressions:_col0 (type: bigint)
                        Map-reduce partition columns:_col0 (type: bigint)
                        sort order:+
                        Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_29]
    + Group By Operator [GBY_31]
                           keys:_col0 (type: bigint)
                           outputColumnNames:["_col0"]
                           Statistics:Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_27]
    + Select Operator [SEL_29]
                              outputColumnNames:["_col0"]
                              Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_45]
    + Filter Operator [FIL_47]
                                 predicate:_col1 is not null (type: boolean)
                                 Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_47]
    + Select Operator [SEL_49]
                                    outputColumnNames:["_col1"]
                                    Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
    - Group By Operator [GBY_26]
    + Group By Operator [GBY_28]
    aggregations:["count(VALUE._col0)"]
    keys:KEY._col0 (type: string)
    outputColumnNames:["_col0","_col1"]
    Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
    <-Map 6 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_25]
    + Reduce Output Operator [RS_27]
                                          key expressions:_col0 (type: string)
                                          Map-reduce partition columns:_col0 (type: string)
                                          sort order:+
                                          Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
                                          value expressions:_col1 (type: bigint)
    - Group By Operator [GBY_24]
    + Group By Operator [GBY_26]
                                             aggregations:["count()"]
                                             keys:_col0 (type: string)
                                             outputColumnNames:["_col0","_col1"]
                                             Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_22]
    + Select Operator [SEL_24]
                                                outputColumnNames:["_col0"]
                                                Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - Filter Operator [FIL_46]
    + Filter Operator [FIL_48]
                                                   predicate:(key > '9') (type: boolean)
                                                   Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
    - TableScan [TS_20]
    + TableScan [TS_22]
                                                      alias:b
                                                      Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE


    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
    index 503a5e9..41aa048 100644
    --- a/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
    +++ b/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
    @@ -41,17 +41,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (value is not null and key is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((value > 'val_9') and key is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col1 (type: string), _col0 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -83,10 +83,10 @@ STAGE PLANS:
                        0 _col1 (type: string), _col0 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/subquery_in.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/subquery_in.q.out b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
    index 38f4bc6..2b1237b 100644
    --- a/ql/src/test/results/clientpositive/tez/subquery_in.q.out
    +++ b/ql/src/test/results/clientpositive/tez/subquery_in.q.out
    @@ -31,17 +31,17 @@ STAGE PLANS:
                        alias: src
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key > '9') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string)
                              sort order: +
                              Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                              value expressions: _col1 (type: string)
              Map 3
                  Map Operator Tree:
    @@ -74,10 +74,10 @@ STAGE PLANS:
                        0 _col0 (type: string)
                        1 _col0 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -149,17 +149,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string), _col1 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -191,10 +191,10 @@ STAGE PLANS:
                        0 _col0 (type: string), _col1 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -627,17 +627,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string), _col1 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -669,10 +669,10 @@ STAGE PLANS:
                        0 _col0 (type: string), _col1 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
    index af80260..b9d601a 100644
    --- a/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vector_inner_join.q.out
    @@ -150,11 +150,11 @@ STAGE PLANS:
                              outputColumnNames: _col0
                              input vertices:
                                1 Map 2
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                              HybridGraceHashJoin: true
                              File Output Operator
                                compressed: false
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                                table:
                                    input format: org.apache.hadoop.mapred.TextInputFormat
                                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -166,22 +166,22 @@ STAGE PLANS:
                        alias: t1
                        Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: a is not null (type: boolean)
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + predicate: (a > 2) (type: boolean)
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: a (type: int)
                            outputColumnNames: _col0
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
                              keys: _col0 (type: int)
                              mode: hash
                              outputColumnNames: _col0
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                              Reduce Output Operator
                                key expressions: _col0 (type: int)
                                sort order: +
                                Map-reduce partition columns: _col0 (type: int)
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                  Execution mode: vectorized

        Stage: Stage-0

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
    index 29b86bd..e7fec82 100644
    --- a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out
    @@ -202,15 +202,15 @@ STAGE PLANS:
                          predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
    - outputColumnNames: _col0, _col1, _col2, _col3
    + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
    + outputColumnNames: _col0, _col1, _col2
                            Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                            Map Join Operator
                              condition map:
                                   Left Semi Join 0 to 1
                              keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                              outputColumnNames: _col1, _col2
                              input vertices:
                                1 Map 2
    @@ -228,21 +228,21 @@ STAGE PLANS:
                        alias: lineitem
                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_linenumber (type: int)
    - outputColumnNames: _col0, _col1
    + expressions: l_orderkey (type: int)
    + outputColumnNames: _col0
                            Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
    - keys: _col0 (type: int), _col1 (type: int)
    + keys: _col0 (type: int)
                              mode: hash
    - outputColumnNames: _col0, _col1
    + outputColumnNames: _col0
                              Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                              Reduce Output Operator
    - key expressions: _col0 (type: int), _col1 (type: int)
    - sort order: ++
    - Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
                                Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_inner_join.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vector_inner_join.q.out b/ql/src/test/results/clientpositive/vector_inner_join.q.out
    index 3e8d2f4..4775ae9 100644
    --- a/ql/src/test/results/clientpositive/vector_inner_join.q.out
    +++ b/ql/src/test/results/clientpositive/vector_inner_join.q.out
    @@ -136,17 +136,17 @@ STAGE PLANS:
                  alias: t1
                  Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: a is not null (type: boolean)
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + predicate: (a > 2) (type: boolean)
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: a (type: int)
                      outputColumnNames: _col0
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
                        keys: _col0 (type: int)
                        mode: hash
                        outputColumnNames: _col0
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
                        HashTable Sink Operator
                          keys:
                            0 _col0 (type: int)
    @@ -172,10 +172,10 @@ STAGE PLANS:
                          0 _col0 (type: int)
                          1 _col0 (type: int)
                        outputColumnNames: _col0
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
                          compressed: false
    - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
                          table:
                              input format: org.apache.hadoop.mapred.TextInputFormat
                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
    index 16916cb..fdd7ea8 100644
    --- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
    +++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out
    @@ -465,30 +465,30 @@ STAGE PLANS:
        Stage: Stage-11
          Map Reduce Local Work
            Alias -> Map Local Tables:
    - $hdt$_1:lineitem
    + $hdt$_1:$hdt$_1:lineitem
                Fetch Operator
                  limit: -1
            Alias -> Map Local Operator Tree:
    - $hdt$_1:lineitem
    + $hdt$_1:$hdt$_1:lineitem
                TableScan
                  alias: lineitem
                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                    Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
    - expressions: l_orderkey (type: int), l_linenumber (type: int)
    - outputColumnNames: _col0, _col1
    + expressions: l_orderkey (type: int)
    + outputColumnNames: _col0
                      Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
    - keys: _col0 (type: int), _col1 (type: int)
    + keys: _col0 (type: int)
                        mode: hash
    - outputColumnNames: _col0, _col1
    + outputColumnNames: _col0
                        Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                        HashTable Sink Operator
                          keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)

        Stage: Stage-8
          Map Reduce
    @@ -500,15 +500,15 @@ STAGE PLANS:
                    predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                    Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
    - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
    - outputColumnNames: _col0, _col1, _col2, _col3
    + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
    + outputColumnNames: _col0, _col1, _col2
                      Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Semi Join 0 to 1
                        keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                        outputColumnNames: _col1, _col2
                        Statistics: Num rows: 14 Data size: 1714 Basic stats: COMPLETE Column stats: NONE
                        File Output Operator
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11604 : HIVE return wrong results in some queries with PTF function (Yongzhi Chen via Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/308ae90a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/308ae90a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/308ae90a

    Branch: refs/heads/beeline-cli
    Commit: 308ae90aa396a2d99660f6ccf931c031ce9aa8a1
    Parents: 3ff3c6f
    Author: Szehon Ho <szehon@cloudera.com>
    Authored: Wed Sep 2 11:49:04 2015 -0700
    Committer: Szehon Ho <szehon@cloudera.com>
    Committed: Wed Sep 2 11:49:04 2015 -0700

    ----------------------------------------------------------------------
      .../ql/optimizer/IdentityProjectRemover.java | 15 +
      .../queries/clientpositive/ptfgroupbyjoin.q | 61 +++
      .../results/clientpositive/ptfgroupbyjoin.q.out | 519 +++++++++++++++++++
      .../clientpositive/tez/explainuser_1.q.out | 31 +-
      4 files changed, 612 insertions(+), 14 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
    index e3d3ce6..135b90b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
    @@ -30,8 +30,10 @@ import com.google.common.collect.Iterators;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.ql.exec.FilterOperator;
      import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
      import org.apache.hadoop.hive.ql.exec.Operator;
    +import org.apache.hadoop.hive.ql.exec.PTFOperator;
      import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
      import org.apache.hadoop.hive.ql.exec.SelectOperator;
      import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
    @@ -111,6 +113,19 @@ public class IdentityProjectRemover implements Transform {
              // For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler
              return null;
            }
    + List<Operator<? extends OperatorDesc>> ancestorList = new ArrayList<Operator<? extends OperatorDesc>>();
    + ancestorList.addAll(sel.getParentOperators());
    + while (!ancestorList.isEmpty()) {
    + Operator<? extends OperatorDesc> curParent = ancestorList.remove(0);
    + // PTF need a SelectOp.
    + if ((curParent instanceof PTFOperator)) {
    + return null;
    + }
    + if ((curParent instanceof FilterOperator) && curParent.getParentOperators() != null) {
    + ancestorList.addAll(curParent.getParentOperators());
    + }
    + }
    +
            if(sel.isIdentitySelect()) {
              parent.removeChildAndAdoptItsChildren(sel);
              LOG.debug("Identity project remover optimization removed : " + sel);

    http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
    new file mode 100644
    index 0000000..61d034e
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/ptfgroupbyjoin.q
    @@ -0,0 +1,61 @@
    +create table tlb1 (id int, fkey int, val string);
    +create table tlb2 (fid int, name string);
    +insert into table tlb1 values(100,1,'abc');
    +insert into table tlb1 values(200,1,'efg');
    +insert into table tlb2 values(1, 'key1');
    +
    +explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +explain
    +select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +
    +set hive.optimize.ppd=false;
    +
    +explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey;
    +
    +

    http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
    new file mode 100644
    index 0000000..9368df9
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out
    @@ -0,0 +1,519 @@
    +PREHOOK: query: create table tlb1 (id int, fkey int, val string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@tlb1
    +POSTHOOK: query: create table tlb1 (id int, fkey int, val string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@tlb1
    +PREHOOK: query: create table tlb2 (fid int, name string)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@tlb2
    +POSTHOOK: query: create table tlb2 (fid int, name string)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@tlb2
    +PREHOOK: query: insert into table tlb1 values(100,1,'abc')
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__1
    +PREHOOK: Output: default@tlb1
    +POSTHOOK: query: insert into table tlb1 values(100,1,'abc')
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__1
    +POSTHOOK: Output: default@tlb1
    +POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    +POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
    +PREHOOK: query: insert into table tlb1 values(200,1,'efg')
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__2
    +PREHOOK: Output: default@tlb1
    +POSTHOOK: query: insert into table tlb1 values(200,1,'efg')
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__2
    +POSTHOOK: Output: default@tlb1
    +POSTHOOK: Lineage: tlb1.fkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    +POSTHOOK: Lineage: tlb1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +POSTHOOK: Lineage: tlb1.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
    +PREHOOK: query: insert into table tlb2 values(1, 'key1')
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__3
    +PREHOOK: Output: default@tlb2
    +POSTHOOK: query: insert into table tlb2 values(1, 'key1')
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__3
    +POSTHOOK: Output: default@tlb2
    +POSTHOOK: Lineage: tlb2.fid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +POSTHOOK: Lineage: tlb2.name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    +PREHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tlb1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: id (type: int), fkey (type: int)
    + outputColumnNames: id, fkey
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + keys: id (type: int), fkey (type: int)
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: int)
    + sort order: ++
    + Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Group By Operator
    + keys: KEY._col0 (type: int), KEY._col1 (type: int)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + PTF Operator
    + Function definitions:
    + Input definition
    + input alias: ptf_0
    + output shape: _col0: int, _col1: int
    + type: WINDOWING
    + Windowing table definition
    + input alias: ptf_1
    + name: windowingtablefunction
    + order by: _col0, _col1
    + partition by: _col0, _col1
    + raw input shape:
    + window functions:
    + window function definition
    + alias: row_number_window_0
    + name: row_number
    + window function: GenericUDAFRowNumberEvaluator
    + window frame: PRECEDING(MAX)~FOLLOWING(MAX)
    + isPivotResult: true
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: _col1 is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col1 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col1 (type: int)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: int)
    + TableScan
    + alias: aaa
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: fid is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: fid (type: int)
    + sort order: +
    + Map-reduce partition columns: fid (type: int)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + value expressions: name (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col1 (type: int)
    + 1 fid (type: int)
    + outputColumnNames: _col0, _col1, _col4
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tlb1
    +PREHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tlb1
    +POSTHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +200 1 key1
    +100 1 key1
    +PREHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tlb1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: id (type: int), fkey (type: int)
    + outputColumnNames: id, fkey
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + keys: id (type: int), fkey (type: int)
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: int)
    + sort order: ++
    + Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Group By Operator
    + keys: KEY._col0 (type: int), KEY._col1 (type: int)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + PTF Operator
    + Function definitions:
    + Input definition
    + input alias: ptf_0
    + output shape: _col0: int, _col1: int
    + type: WINDOWING
    + Windowing table definition
    + input alias: ptf_1
    + name: windowingtablefunction
    + order by: _col0, _col1
    + partition by: _col0, _col1
    + raw input shape:
    + window functions:
    + window function definition
    + alias: row_number_window_0
    + name: row_number
    + window function: GenericUDAFRowNumberEvaluator
    + window frame: PRECEDING(MAX)~FOLLOWING(MAX)
    + isPivotResult: true
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: _col1 is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int), row_number_window_0 (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col1 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col1 (type: int)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: int), _col2 (type: int)
    + TableScan
    + alias: aaa
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: fid is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: fid (type: int)
    + sort order: +
    + Map-reduce partition columns: fid (type: int)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + value expressions: name (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col1 (type: int)
    + 1 fid (type: int)
    + outputColumnNames: _col0, _col1, _col2, _col4
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string), _col2 (type: int)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tlb1
    +PREHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name, ddd.rnum
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tlb1
    +POSTHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +200 1 key1 1
    +100 1 key1 1
    +PREHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tlb1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: id (type: int), fkey (type: int)
    + outputColumnNames: id, fkey
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + keys: id (type: int), fkey (type: int)
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: int)
    + sort order: ++
    + Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Group By Operator
    + keys: KEY._col0 (type: int), KEY._col1 (type: int)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + PTF Operator
    + Function definitions:
    + Input definition
    + input alias: ptf_0
    + output shape: _col0: int, _col1: int
    + type: WINDOWING
    + Windowing table definition
    + input alias: ptf_1
    + name: windowingtablefunction
    + order by: _col0, _col1
    + partition by: _col0, _col1
    + raw input shape:
    + window functions:
    + window function definition
    + alias: row_number_window_0
    + name: row_number
    + window function: GenericUDAFRowNumberEvaluator
    + window frame: PRECEDING(MAX)~FOLLOWING(MAX)
    + isPivotResult: true
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: _col1 is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col1 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col1 (type: int)
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: int)
    + TableScan
    + alias: aaa
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: fid is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: fid (type: int)
    + sort order: +
    + Map-reduce partition columns: fid (type: int)
    + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
    + value expressions: name (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col1 (type: int)
    + 1 fid (type: int)
    + outputColumnNames: _col0, _col1, _col4
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: string)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tlb1
    +PREHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +POSTHOOK: query: select ddd.id, ddd.fkey, aaa.name
    +from (
    + select id, fkey,
    + row_number() over (partition by id, fkey) as rnum
    + from tlb1 group by id, fkey
    + ) ddd
    +inner join tlb2 aaa on aaa.fid = ddd.fkey
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tlb1
    +POSTHOOK: Input: default@tlb2
    +#### A masked pattern was here ####
    +200 1 key1
    +100 1 key1

    http://git-wip-us.apache.org/repos/asf/hive/blob/308ae90a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    index 9756b0c..4d797f2 100644
    --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
    @@ -6967,22 +6967,25 @@ Stage-0
                                    Map-reduce partition columns:rand() (type: double)
                                    sort order:+++
                                    Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
    - PTF Operator [PTF_3]
    - Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
    + Select Operator [SEL_4]
    + outputColumnNames:["_col1","_col2","_col5"]
                                       Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
    - Select Operator [SEL_2]
    - | outputColumnNames:["_col1","_col2","_col5"]
    - | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
    - |<-Map 1 [SIMPLE_EDGE]
    - Reduce Output Operator [RS_1]
    - key expressions:p_mfgr (type: string), p_name (type: string)
    - Map-reduce partition columns:p_mfgr (type: string)
    - sort order:++
    - Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
    - value expressions:p_size (type: int)
    - TableScan [TS_0]
    - alias:part
    + PTF Operator [PTF_3]
    + Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
    + Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
    + Select Operator [SEL_2]
    + | outputColumnNames:["_col1","_col2","_col5"]
    + | Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
    + |<-Map 1 [SIMPLE_EDGE]
    + Reduce Output Operator [RS_1]
    + key expressions:p_mfgr (type: string), p_name (type: string)
    + Map-reduce partition columns:p_mfgr (type: string)
    + sort order:++
                                                Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
    + value expressions:p_size (type: int)
    + TableScan [TS_0]
    + alias:part
    + Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE

      PREHOOK: query: explain
      select abc.*
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11536 follow up, seems earlier commit had strange line endings


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c0690a69
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c0690a69
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c0690a69

    Branch: refs/heads/beeline-cli
    Commit: c0690a69dcd4976435f8b33084f9b9b3c0c16889
    Parents: b22e54e
    Author: Alan Gates <gates@hortonworks.com>
    Authored: Tue Sep 1 09:57:24 2015 -0700
    Committer: Alan Gates <gates@hortonworks.com>
    Committed: Tue Sep 1 09:57:24 2015 -0700

    ----------------------------------------------------------------------
      .../src/test/queries/db/rowtype_attribute.sql | 42 ++++----
      hplsql/src/test/queries/db/type_attribute.sql | 14 +--
      .../local/create_procedure_no_params.sql | 36 +++----
      .../test/queries/offline/create_table_ora.sql | 104 +++++++++----------
      4 files changed, 98 insertions(+), 98 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/rowtype_attribute.sql
    ----------------------------------------------------------------------
    diff --git a/hplsql/src/test/queries/db/rowtype_attribute.sql b/hplsql/src/test/queries/db/rowtype_attribute.sql
    index 6a84b57..2fc65ce 100644
    --- a/hplsql/src/test/queries/db/rowtype_attribute.sql
    +++ b/hplsql/src/test/queries/db/rowtype_attribute.sql
    @@ -1,22 +1,22 @@
    -DECLARE
    - v1 default.src%ROWTYPE;
    - v2 src %ROWTYPE;
    - v3 src % ROWTYPE;
    - CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
    -BEGIN
    - SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
    - PRINT v1.key || v1.value;
    -
    - OPEN c1;
    - FETCH c1 INTO v2;
    - PRINT v2.key || v2.value;
    - CLOSE c1;
    -
    - FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
    - LOOP
    - PRINT rec.key || rec.value;
    - END LOOP;
    -
    - EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
    - PRINT v3.key || v3.value;
    +DECLARE
    + v1 default.src%ROWTYPE;
    + v2 src %ROWTYPE;
    + v3 src % ROWTYPE;
    + CURSOR c1 IS SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1;
    +BEGIN
    + SELECT 'A' AS key, 'B' AS value INTO v1 FROM src LIMIT 1;
    + PRINT v1.key || v1.value;
    +
    + OPEN c1;
    + FETCH c1 INTO v2;
    + PRINT v2.key || v2.value;
    + CLOSE c1;
    +
    + FOR rec IN (SELECT 'A' AS key, 'B' AS value FROM src LIMIT 1)
    + LOOP
    + PRINT rec.key || rec.value;
    + END LOOP;
    +
    + EXECUTE IMMEDIATE 'SELECT ''A'' AS key, ''B'' AS value FROM src LIMIT 1' INTO v3;
    + PRINT v3.key || v3.value;
      END
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/db/type_attribute.sql
    ----------------------------------------------------------------------
    diff --git a/hplsql/src/test/queries/db/type_attribute.sql b/hplsql/src/test/queries/db/type_attribute.sql
    index 2d93bfd..059c291 100644
    --- a/hplsql/src/test/queries/db/type_attribute.sql
    +++ b/hplsql/src/test/queries/db/type_attribute.sql
    @@ -1,8 +1,8 @@
    -DECLARE
    - v1 default.src.key%TYPE;
    - v2 src.Key %TYPE;
    - v3 src.key3 % TYPE;
    -BEGIN
    - SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
    - PRINT v1 || v2 || v3;
    +DECLARE
    + v1 default.src.key%TYPE;
    + v2 src.Key %TYPE;
    + v3 src.key3 % TYPE;
    +BEGIN
    + SELECT 'A', 'B', 1 INTO v1, v2, v3 FROM src LIMIT 1;
    + PRINT v1 || v2 || v3;
      END
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/local/create_procedure_no_params.sql
    ----------------------------------------------------------------------
    diff --git a/hplsql/src/test/queries/local/create_procedure_no_params.sql b/hplsql/src/test/queries/local/create_procedure_no_params.sql
    index 535ba98..c4c2992 100644
    --- a/hplsql/src/test/queries/local/create_procedure_no_params.sql
    +++ b/hplsql/src/test/queries/local/create_procedure_no_params.sql
    @@ -1,19 +1,19 @@
    -create procedure sp1
    -begin
    - print 'a';
    -end;
    -
    -create procedure sp2()
    -begin
    - print 'b';
    -end;
    -
    -call sp1;
    -call sp1();
    -sp1;
    -sp1();
    -
    -call sp2;
    -call sp2();
    -sp2;
    +create procedure sp1
    +begin
    + print 'a';
    +end;
    +
    +create procedure sp2()
    +begin
    + print 'b';
    +end;
    +
    +call sp1;
    +call sp1();
    +sp1;
    +sp1();
    +
    +call sp2;
    +call sp2();
    +sp2;
      sp2();
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c0690a69/hplsql/src/test/queries/offline/create_table_ora.sql
    ----------------------------------------------------------------------
    diff --git a/hplsql/src/test/queries/offline/create_table_ora.sql b/hplsql/src/test/queries/offline/create_table_ora.sql
    index bb1d9c7..40a543a 100644
    --- a/hplsql/src/test/queries/offline/create_table_ora.sql
    +++ b/hplsql/src/test/queries/offline/create_table_ora.sql
    @@ -1,53 +1,53 @@
    -CREATE TABLE ora_t1 (
    - n1 NUMBER(3,0),
    - v1 VARCHAR2(10)
    -);
    -
    -CREATE TABLE "USER"."EMP"
    - ( "EMPNO" NUMBER(4,0),
    - "ENAME" VARCHAR2(10 BYTE),
    - "JOB" VARCHAR2(9 BYTE),
    - "MGR" NUMBER(4,0),
    - "HIREDATE" DATE,
    - "SAL" NUMBER(7,2),
    - "COMM" NUMBER(7,2),
    - "DEPTNO" NUMBER(2,0)
    - ) SEGMENT CREATION IMMEDIATE
    - PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
    - STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
    - PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
    - TABLESPACE "USERS" ;
    -
    -CREATE TABLE language (
    - id NUMBER(7) NOT NULL PRIMARY KEY,
    - cd CHAR(2) NOT NULL,
    - description VARCHAR2(50)
    -);
    -CREATE TABLE author (
    - id NUMBER(7) NOT NULL PRIMARY KEY,
    - first_name VARCHAR2(50),
    - last_name VARCHAR2(50) NOT NULL,
    - date_of_birth DATE,
    - year_of_birth NUMBER(7),
    - distinguished NUMBER(1)
    -);
    -CREATE TABLE book (
    - id NUMBER(7) NOT NULL PRIMARY KEY,
    - author_id NUMBER(7) NOT NULL,
    - title VARCHAR2(400) NOT NULL,
    - published_in NUMBER(7) NOT NULL,
    - language_id NUMBER(7) NOT NULL,
    - CONSTRAINT fk_book_author FOREIGN KEY (author_id) REFERENCES author(id),
    - CONSTRAINT fk_book_language FOREIGN KEY (language_id) REFERENCES language(id)
    -);
    -CREATE TABLE book_store (
    - name VARCHAR2(400) NOT NULL UNIQUE
    -);
    -CREATE TABLE book_to_book_store (
    - name VARCHAR2(400) NOT NULL,
    - book_id INTEGER NOT NULL,
    - stock INTEGER,
    - PRIMARY KEY(name, book_id),
    - CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name) REFERENCES book_store (name) ON DELETE CASCADE,
    - CONSTRAINT fk_b2bs_book FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
    +CREATE TABLE ora_t1 (
    + n1 NUMBER(3,0),
    + v1 VARCHAR2(10)
    +);
    +
    +CREATE TABLE "USER"."EMP"
    + ( "EMPNO" NUMBER(4,0),
    + "ENAME" VARCHAR2(10 BYTE),
    + "JOB" VARCHAR2(9 BYTE),
    + "MGR" NUMBER(4,0),
    + "HIREDATE" DATE,
    + "SAL" NUMBER(7,2),
    + "COMM" NUMBER(7,2),
    + "DEPTNO" NUMBER(2,0)
    + ) SEGMENT CREATION IMMEDIATE
    + PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 NOCOMPRESS LOGGING
    + STORAGE(INITIAL 65536 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645
    + PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL DEFAULT FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT)
    + TABLESPACE "USERS" ;
    +
    +CREATE TABLE language (
    + id NUMBER(7) NOT NULL PRIMARY KEY,
    + cd CHAR(2) NOT NULL,
    + description VARCHAR2(50)
    +);
    +CREATE TABLE author (
    + id NUMBER(7) NOT NULL PRIMARY KEY,
    + first_name VARCHAR2(50),
    + last_name VARCHAR2(50) NOT NULL,
    + date_of_birth DATE,
    + year_of_birth NUMBER(7),
    + distinguished NUMBER(1)
    +);
    +CREATE TABLE book (
    + id NUMBER(7) NOT NULL PRIMARY KEY,
    + author_id NUMBER(7) NOT NULL,
    + title VARCHAR2(400) NOT NULL,
    + published_in NUMBER(7) NOT NULL,
    + language_id NUMBER(7) NOT NULL,
    + CONSTRAINT fk_book_author FOREIGN KEY (author_id) REFERENCES author(id),
    + CONSTRAINT fk_book_language FOREIGN KEY (language_id) REFERENCES language(id)
    +);
    +CREATE TABLE book_store (
    + name VARCHAR2(400) NOT NULL UNIQUE
    +);
    +CREATE TABLE book_to_book_store (
    + name VARCHAR2(400) NOT NULL,
    + book_id INTEGER NOT NULL,
    + stock INTEGER,
    + PRIMARY KEY(name, book_id),
    + CONSTRAINT fk_b2bs_book_store FOREIGN KEY (name) REFERENCES book_store (name) ON DELETE CASCADE,
    + CONSTRAINT fk_b2bs_book FOREIGN KEY (book_id) REFERENCES book (id) ON DELETE CASCADE
      );
    \ No newline at end of file
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e

    Branch: refs/heads/beeline-cli
    Commit: af91308e5b6573ea6dc793912bcc628a5a40c000
    Parents: 22fa921
    Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Authored: Sat Aug 29 11:40:03 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Sat Aug 29 11:42:59 2015 +0200

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 ++++++++++++++------
      .../hadoop/hive/ql/lib/ForwardWalker.java | 33 ++++----
      .../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
      .../hive/ql/optimizer/ConstantPropagate.java | 10 +--
      4 files changed, 79 insertions(+), 50 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    index 583c113..07d2734 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    @@ -22,7 +22,9 @@ import java.util.ArrayList;
      import java.util.Collection;
      import java.util.HashMap;
      import java.util.IdentityHashMap;
    +import java.util.LinkedList;
      import java.util.List;
    +import java.util.Queue;
      import java.util.Set;
      import java.util.Stack;

    @@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
       */
      public class DefaultGraphWalker implements GraphWalker {

    - protected Stack<Node> opStack;
    + /**
    + * opStack keeps the nodes that have been visited, but have not been
    + * dispatched yet
    + */
    + protected final Stack<Node> opStack;
    + /**
    + * opQueue keeps the nodes in the order that the were dispatched.
    + * Then it is used to go through the processed nodes and store
    + * the results that the dispatcher has produced (if any)
    + */
    + protected final Queue<Node> opQueue;
    + /**
    + * toWalk stores the starting nodes for the graph that needs to be
    + * traversed
    + */
        protected final List<Node> toWalk = new ArrayList<Node>();
        protected final IdentityHashMap<Node, Object> retMap = new IdentityHashMap<Node, Object>();
        protected final Dispatcher dispatcher;
    @@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker {
        public DefaultGraphWalker(Dispatcher disp) {
          dispatcher = disp;
          opStack = new Stack<Node>();
    - }
    -
    - /**
    - * @return the toWalk
    - */
    - public List<Node> getToWalk() {
    - return toWalk;
    + opQueue = new LinkedList<Node>();
        }

        /**
    @@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker {
          while (toWalk.size() > 0) {
            Node nd = toWalk.remove(0);
            walk(nd);
    + // Some walkers extending DefaultGraphWalker e.g. ForwardWalker
    + // do not use opQueue and rely uniquely in the toWalk structure,
    + // thus we store the results produced by the dispatcher here
    + // TODO: rewriting the logic of those walkers to use opQueue
            if (nodeOutput != null && getDispatchedList().contains(nd)) {
              nodeOutput.put(nd, retMap.get(nd));
            }
          }
    +
    + // Store the results produced by the dispatcher
    + while (!opQueue.isEmpty()) {
    + Node node = opQueue.poll();
    + if (nodeOutput != null && getDispatchedList().contains(node)) {
    + nodeOutput.put(node, retMap.get(node));
    + }
    + }
        }

        /**
    @@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker {
         * current operator in the graph
         * @throws SemanticException
         */
    - public void walk(Node nd) throws SemanticException {
    - if (opStack.empty() || nd != opStack.peek()) {
    - opStack.push(nd);
    - }
    + public void walk(Node nd) throws SemanticException {
    + // Push the node in the stack
    + opStack.push(nd);
    +
    + // While there are still nodes to dispatch...
    + while (!opStack.empty()) {
    + Node node = opStack.peek();

    - if ((nd.getChildren() == null)
    - || getDispatchedList().containsAll(nd.getChildren())) {
    - // all children are done or no need to walk the children
    - if (!getDispatchedList().contains(nd)) {
    - dispatch(nd, opStack);
    + if (node.getChildren() == null ||
    + getDispatchedList().containsAll(node.getChildren())) {
    + // Dispatch current node
    + if (!getDispatchedList().contains(node)) {
    + dispatch(node, opStack);
    + opQueue.add(node);
    + }
    + opStack.pop();
    + continue;
            }
    - opStack.pop();
    - return;
    - }
    - // add children, self to the front of the queue in that order
    - getToWalk().add(0, nd);
    - getToWalk().removeAll(nd.getChildren());
    - getToWalk().addAll(0, nd.getChildren());
    +
    + // Add a single child and restart the loop
    + for (Node childNode : node.getChildren()) {
    + if (!getDispatchedList().contains(childNode)) {
    + opStack.push(childNode);
    + break;
    + }
    + }
    + } // end while
        }
    +
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
    index a2db3b5..67b4700 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
    @@ -19,20 +19,17 @@
      package org.apache.hadoop.hive.ql.lib;

      import org.apache.hadoop.hive.ql.exec.Operator;
    -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
    -import org.apache.hadoop.hive.ql.lib.Dispatcher;
    -import org.apache.hadoop.hive.ql.lib.Node;
      import org.apache.hadoop.hive.ql.parse.SemanticException;
      import org.apache.hadoop.hive.ql.plan.OperatorDesc;

      public class ForwardWalker extends DefaultGraphWalker {

        /**
    -* Constructor.
    -*
    -* @param disp
    -* dispatcher to call for each op encountered
    -*/
    + * Constructor.
    + *
    + * @param disp
    + * dispatcher to call for each op encountered
    + */
        public ForwardWalker(Dispatcher disp) {
          super(disp);
        }
    @@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker {
        @SuppressWarnings("unchecked")
        protected void addAllParents(Node nd) {
          Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
    - getToWalk().removeAll(op.getParentOperators());
    - getToWalk().addAll(0, op.getParentOperators());
    + toWalk.removeAll(op.getParentOperators());
    + toWalk.addAll(0, op.getParentOperators());
        }

        /**
    -* walk the current operator and its descendants.
    -*
    -* @param nd
    -* current operator in the graph
    -* @throws SemanticException
    -*/
    + * walk the current operator and its descendants.
    + *
    + * @param nd
    + * current operator in the graph
    + * @throws SemanticException
    + */
        @Override
        public void walk(Node nd) throws SemanticException {
          if (opStack.empty() || nd != opStack.peek()) {
    @@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker {
          if (allParentsDispatched(nd)) {
            // all children are done or no need to walk the children
            if (!getDispatchedList().contains(nd)) {
    - getToWalk().addAll(nd.getChildren());
    + toWalk.addAll(nd.getChildren());
              dispatch(nd, opStack);
            }
            opStack.pop();
            return;
          }
          // add children, self to the front of the queue in that order
    - getToWalk().add(0, nd);
    + toWalk.add(0, nd);
          addAllParents(nd);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
    index 9a45458..735b448 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
    @@ -174,10 +174,10 @@ public class ColumnPruner implements Transform {
              return;
            }
            // move all the children to the front of queue
    - getToWalk().removeAll(nd.getChildren());
    - getToWalk().addAll(0, nd.getChildren());
    + toWalk.removeAll(nd.getChildren());
    + toWalk.addAll(0, nd.getChildren());
            // add self to the end of the queue
    - getToWalk().add(nd);
    + toWalk.add(nd);
            opStack.pop();
          }
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
    index dd53ced..b6f1f27 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
    @@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform {
              dispatch(nd, opStack);
              opStack.pop();
            } else {
    - getToWalk().removeAll(parents);
    - getToWalk().add(0, nd);
    - getToWalk().addAll(0, parents);
    + toWalk.removeAll(parents);
    + toWalk.add(0, nd);
    + toWalk.addAll(0, parents);
              return;
            }

            // move all the children to the front of queue
            List<? extends Node> children = nd.getChildren();
            if (children != null) {
    - getToWalk().removeAll(children);
    - getToWalk().addAll(children);
    + toWalk.removeAll(children);
    + toWalk.addAll(children);
            }
          }
        }
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11623: CBO: Calcite Operator To Hive Operator (Calcite Return Path): fix the tableAlias for ReduceSink operator (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e85bbf2
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e85bbf2
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e85bbf2

    Branch: refs/heads/beeline-cli
    Commit: 9e85bbf2780510edda79c247248da57619530577
    Parents: fb152e4
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Thu Aug 27 11:26:25 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Thu Aug 27 11:26:46 2015 -0700

    ----------------------------------------------------------------------
      .../calcite/translator/HiveOpConverter.java | 31 ++++++++++++++++----
      1 file changed, 26 insertions(+), 5 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/9e85bbf2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    index 4db9863..1931880 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
    @@ -686,13 +686,34 @@ public class HiveOpConverter {
            int numReducers, Operation acidOperation, boolean strictMode,
            List<String> keepColNames) throws SemanticException {
          // 1. Generate RS operator
    - if (input.getSchema().getTableNames().size() != 1) {
    + // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    + // as empty string in table aliases is only allowed for virtual columns.
    + String tableAlias = null;
    + Set<String> tableNames = input.getSchema().getTableNames();
    + for (String tableName : tableNames) {
    + if (tableName != null) {
    + if (tableName.length() == 0) {
    + if (tableAlias == null) {
    + tableAlias = tableName;
    + }
    + } else {
    + if (tableAlias == null || tableAlias.length() == 0) {
    + tableAlias = tableName;
    + } else {
    + if (!tableName.equals(tableAlias)) {
    + throw new SemanticException(
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
    + }
    + }
    + }
    + }
    + }
    + if (tableAlias == null) {
            throw new SemanticException(
    - "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one SelectOp but there is "
    - + input.getSchema().getTableNames().size());
    + "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
          }
    - ReduceSinkOperator rsOp = genReduceSink(input, input.getSchema().getTableNames().iterator()
    - .next(), keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);
    + // 1.2 Now generate RS operator
    + ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, strictMode);

          // 2. Generate backtrack Select operator
          Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames,
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11595 : refactor ORC footer reading to make it usable from outside (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22fa9216
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22fa9216
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22fa9216

    Branch: refs/heads/beeline-cli
    Commit: 22fa9216d4e32d7681d3c1be8cbedc8c7999e56d
    Parents: 97bf32a
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Fri Aug 28 18:23:05 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Fri Aug 28 18:23:05 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/io/orc/Reader.java | 6 +
      .../hadoop/hive/ql/io/orc/ReaderImpl.java | 281 +++++++++++++------
      2 files changed, 204 insertions(+), 83 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
    index 7bddefc..187924d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
    @@ -22,6 +22,7 @@ import java.io.IOException;
      import java.nio.ByteBuffer;
      import java.util.List;

    +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

    @@ -358,4 +359,9 @@ public interface Reader {
                          String[] neededColumns) throws IOException;

        MetadataReader metadata() throws IOException;
    +
    + /** Gets serialized file metadata read from disk for the purposes of caching, etc. */
    + ByteBuffer getSerializedFileFooter();
    +
    + Footer getFooter();
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/22fa9216/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    index c990d85..ab539c4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    @@ -35,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.hive.common.DiskRange;
      import org.apache.hadoop.hive.ql.io.FileFormatException;
    +import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
    +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer;
      import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
      import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
      import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
    @@ -74,6 +76,9 @@ public class ReaderImpl implements Reader {
        // will help avoid cpu cycles spend in deserializing at cost of increased
        // memory footprint.
        private final ByteBuffer footerByteBuffer;
    + // Same for metastore cache - maintains the same background buffer, but includes postscript.
    + // This will only be set if the file footer/metadata was read from disk.
    + private final ByteBuffer footerMetaAndPsBuffer;

        static class StripeInformationImpl
            implements StripeInformation {
    @@ -166,11 +171,7 @@ public class ReaderImpl implements Reader {

        @Override
        public List<StripeInformation> getStripes() {
    - List<StripeInformation> result = new ArrayList<StripeInformation>();
    - for(OrcProto.StripeInformation info: footer.getStripesList()) {
    - result.add(new StripeInformationImpl(info));
    - }
    - return result;
    + return convertProtoStripesToStripes(footer.getStripesList());
        }

        @Override
    @@ -274,7 +275,7 @@ public class ReaderImpl implements Reader {
         * Check to see if this ORC file is from a future version and if so,
         * warn the user that we may not be able to read all of the column encodings.
         * @param log the logger to write any error message to
    - * @param path the filename for error messages
    + * @param path the data source path for error messages
         * @param version the version of hive that wrote the file.
         */
        static void checkOrcVersion(Log log, Path path, List<Integer> version) {
    @@ -287,8 +288,7 @@ public class ReaderImpl implements Reader {
            if (major > OrcFile.Version.CURRENT.getMajor() ||
                (major == OrcFile.Version.CURRENT.getMajor() &&
                 minor > OrcFile.Version.CURRENT.getMinor())) {
    - log.warn("ORC file " + path +
    - " was written by a future Hive version " +
    + log.warn(path + " was written by a future Hive version " +
                       versionString(version) +
                       ". This file may not be readable by this version of Hive.");
            }
    @@ -313,9 +313,11 @@ public class ReaderImpl implements Reader {
          FileMetaInfo footerMetaData;
          if (options.getFileMetaInfo() != null) {
            footerMetaData = options.getFileMetaInfo();
    + this.footerMetaAndPsBuffer = null;
          } else {
            footerMetaData = extractMetaInfoFromFooter(fs, path,
                options.getMaxLength());
    + this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer;
          }
          MetaInfoObjExtractor rInfo =
              new MetaInfoObjExtractor(footerMetaData.compressionType,
    @@ -349,6 +351,111 @@ public class ReaderImpl implements Reader {
          return OrcFile.WriterVersion.ORIGINAL;
        }

    + /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
    + public static FooterInfo extractMetaInfoFromFooter(
    + ByteBuffer bb, Path srcPath) throws IOException {
    + // Read the PostScript. Be very careful as some parts of this historically use bb position
    + // and some use absolute offsets that have to take position into account.
    + int baseOffset = bb.position();
    + int lastByteAbsPos = baseOffset + bb.remaining() - 1;
    + int psLen = bb.get(lastByteAbsPos) & 0xff;
    + int psAbsPos = lastByteAbsPos - psLen;
    + OrcProto.PostScript ps = extractPostScript(bb, srcPath, psLen, psAbsPos);
    + assert baseOffset == bb.position();
    +
    + // Extract PS information.
    + int footerSize = (int)ps.getFooterLength(), metadataSize = (int)ps.getMetadataLength(),
    + footerAbsPos = psAbsPos - footerSize, metadataAbsPos = footerAbsPos - metadataSize;
    + String compressionType = ps.getCompression().toString();
    + CompressionCodec codec = WriterImpl.createCodec(CompressionKind.valueOf(compressionType));
    + int bufferSize = (int)ps.getCompressionBlockSize();
    + bb.position(metadataAbsPos);
    + bb.mark();
    +
    + // Extract metadata and footer.
    + Metadata metadata = new Metadata(extractMetadata(
    + bb, metadataAbsPos, metadataSize, codec, bufferSize));
    + OrcProto.Footer footer = extractFooter(bb, footerAbsPos, footerSize, codec, bufferSize);
    + bb.position(metadataAbsPos);
    + bb.limit(psAbsPos);
    + // TODO: do we need footer buffer here? FileInfo/FileMetaInfo is a mess...
    + FileMetaInfo fmi = new FileMetaInfo(
    + compressionType, bufferSize, metadataSize, bb, extractWriterVersion(ps));
    + return new FooterInfo(metadata, footer, fmi);
    + }
    +
    + private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos,
    + int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
    + bb.position(footerAbsPos);
    + bb.limit(footerAbsPos + footerSize);
    + InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
    + new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
    + return OrcProto.Footer.parseFrom(instream);
    + }
    +
    + private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
    + int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
    + bb.position(metadataAbsPos);
    + bb.limit(metadataAbsPos + metadataSize);
    + InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
    + new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
    + CodedInputStream in = CodedInputStream.newInstance(instream);
    + int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
    + OrcProto.Metadata meta = null;
    + do {
    + try {
    + in.setSizeLimit(msgLimit);
    + meta = OrcProto.Metadata.parseFrom(in);
    + } catch (InvalidProtocolBufferException e) {
    + if (e.getMessage().contains("Protocol message was too large")) {
    + LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
    + " size of the coded input stream." );
    +
    + msgLimit = msgLimit << 1;
    + if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
    + LOG.error("Metadata section exceeds max protobuf message size of " +
    + PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
    + throw e;
    + }
    +
    + // we must have failed in the middle of reading instream and instream doesn't support
    + // resetting the stream
    + instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
    + new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
    + in = CodedInputStream.newInstance(instream);
    + } else {
    + throw e;
    + }
    + }
    + } while (meta == null);
    + return meta;
    + }
    +
    + private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
    + int psLen, int psAbsOffset) throws IOException {
    + // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here.
    + assert bb.hasArray();
    + CodedInputStream in = CodedInputStream.newInstance(
    + bb.array(), bb.arrayOffset() + psAbsOffset, psLen);
    + OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
    + checkOrcVersion(LOG, path, ps.getVersionList());
    +
    + // Check compression codec.
    + switch (ps.getCompression()) {
    + case NONE:
    + break;
    + case ZLIB:
    + break;
    + case SNAPPY:
    + break;
    + case LZO:
    + break;
    + default:
    + throw new IllegalArgumentException("Unknown compression");
    + }
    + return ps;
    + }
    +
        private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs,
                                                              Path path,
                                                              long maxFileLength
    @@ -367,44 +474,24 @@ public class ReaderImpl implements Reader {
          int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
          file.seek(size - readSize);
          ByteBuffer buffer = ByteBuffer.allocate(readSize);
    - file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
    - buffer.remaining());
    + assert buffer.position() == 0;
    + file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
    + buffer.position(0);

          //read the PostScript
          //get length of PostScript
          int psLen = buffer.get(readSize - 1) & 0xff;
          ensureOrcFooter(file, path, psLen, buffer);
          int psOffset = readSize - 1 - psLen;
    - CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
    - buffer.arrayOffset() + psOffset, psLen);
    - OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
    -
    - checkOrcVersion(LOG, path, ps.getVersionList());
    + OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset);

          int footerSize = (int) ps.getFooterLength();
          int metadataSize = (int) ps.getMetadataLength();
    - OrcFile.WriterVersion writerVersion;
    - if (ps.hasWriterVersion()) {
    - writerVersion = getWriterVersion(ps.getWriterVersion());
    - } else {
    - writerVersion = OrcFile.WriterVersion.ORIGINAL;
    - }
    + OrcFile.WriterVersion writerVersion = extractWriterVersion(ps);

    - //check compression codec
    - switch (ps.getCompression()) {
    - case NONE:
    - break;
    - case ZLIB:
    - break;
    - case SNAPPY:
    - break;
    - case LZO:
    - break;
    - default:
    - throw new IllegalArgumentException("Unknown compression");
    - }

          //check if extra bytes need to be read
    + ByteBuffer fullFooterBuffer = null;
          int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
          if (extra > 0) {
            //more bytes need to be read, seek back to the right place and read extra bytes
    @@ -417,10 +504,12 @@ public class ReaderImpl implements Reader {
            extraBuf.put(buffer);
            buffer = extraBuf;
            buffer.position(0);
    + fullFooterBuffer = buffer.slice();
            buffer.limit(footerSize + metadataSize);
          } else {
            //footer is already in the bytes in buffer, just adjust position, length
            buffer.position(psOffset - footerSize - metadataSize);
    + fullFooterBuffer = buffer.slice();
            buffer.limit(psOffset);
          }

    @@ -435,11 +524,24 @@ public class ReaderImpl implements Reader {
              (int) ps.getMetadataLength(),
              buffer,
              ps.getVersionList(),
    - writerVersion
    + writerVersion,
    + fullFooterBuffer
              );
        }

    + private static OrcFile.WriterVersion extractWriterVersion(OrcProto.PostScript ps) {
    + return (ps.hasWriterVersion()
    + ? getWriterVersion(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
    + }

    + private static List<StripeInformation> convertProtoStripesToStripes(
    + List<OrcProto.StripeInformation> stripes) {
    + List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
    + for (OrcProto.StripeInformation info : stripes) {
    + result.add(new StripeInformationImpl(info));
    + }
    + return result;
    + }

        /**
         * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl
    @@ -467,46 +569,10 @@ public class ReaderImpl implements Reader {

            int position = footerBuffer.position();
            int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize;
    - footerBuffer.limit(position + metadataSize);
    -
    - InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
    - new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
    - CodedInputStream in = CodedInputStream.newInstance(instream);
    - int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
    - OrcProto.Metadata meta = null;
    - do {
    - try {
    - in.setSizeLimit(msgLimit);
    - meta = OrcProto.Metadata.parseFrom(in);
    - } catch (InvalidProtocolBufferException e) {
    - if (e.getMessage().contains("Protocol message was too large")) {
    - LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
    - " size of the coded input stream." );
    -
    - msgLimit = msgLimit << 1;
    - if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
    - LOG.error("Metadata section exceeds max protobuf message size of " +
    - PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
    - throw e;
    - }
    -
    - // we must have failed in the middle of reading instream and instream doesn't support
    - // resetting the stream
    - instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
    - new BufferChunk(footerBuffer, 0)), metadataSize, codec, bufferSize);
    - in = CodedInputStream.newInstance(instream);
    - } else {
    - throw e;
    - }
    - }
    - } while (meta == null);
    - this.metadata = meta;

    - footerBuffer.position(position + metadataSize);
    - footerBuffer.limit(position + metadataSize + footerBufferSize);
    - instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
    - new BufferChunk(footerBuffer, 0)), footerBufferSize, codec, bufferSize);
    - this.footer = OrcProto.Footer.parseFrom(instream);
    + this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize);
    + this.footer = extractFooter(
    + footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize);

            footerBuffer.position(position);
            this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
    @@ -518,7 +584,8 @@ public class ReaderImpl implements Reader {
         * that is useful for Reader implementation
         *
         */
    - static class FileMetaInfo{
    + static class FileMetaInfo {
    + private ByteBuffer footerMetaAndPsBuffer;
          final String compressionType;
          final int bufferSize;
          final int metadataSize;
    @@ -526,30 +593,68 @@ public class ReaderImpl implements Reader {
          final List<Integer> versionList;
          final OrcFile.WriterVersion writerVersion;

    + /** Ctor used when reading splits - no version list or full footer buffer. */
          FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
              ByteBuffer footerBuffer, OrcFile.WriterVersion writerVersion) {
            this(compressionType, bufferSize, metadataSize, footerBuffer, null,
    - writerVersion);
    + writerVersion, null);
          }

    - FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
    - ByteBuffer footerBuffer, List<Integer> versionList,
    - OrcFile.WriterVersion writerVersion){
    + /** Ctor used when creating file info during init and when getting a new one. */
    + public FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
    + ByteBuffer footerBuffer, List<Integer> versionList, WriterVersion writerVersion,
    + ByteBuffer fullFooterBuffer) {
            this.compressionType = compressionType;
            this.bufferSize = bufferSize;
            this.metadataSize = metadataSize;
            this.footerBuffer = footerBuffer;
            this.versionList = versionList;
            this.writerVersion = writerVersion;
    + this.footerMetaAndPsBuffer = fullFooterBuffer;
          }
        }

    - public FileMetaInfo getFileMetaInfo(){
    + public FileMetaInfo getFileMetaInfo() {
          return new FileMetaInfo(compressionKind.toString(), bufferSize,
    - metadataSize, footerByteBuffer, versionList, writerVersion);
    + metadataSize, footerByteBuffer, versionList, writerVersion, footerMetaAndPsBuffer);
        }

    + /** Same as FileMetaInfo, but with extra fields. FileMetaInfo is serialized for splits
    + * and so we don't just add fields to it, it's already messy and confusing. */
    + public static final class FooterInfo {
    + private final OrcProto.Footer footer;
    + private final Metadata metadata;
    + private final List<StripeInformation> stripes;
    + private final FileMetaInfo fileMetaInfo;

    + private FooterInfo(Metadata metadata, OrcProto.Footer footer, FileMetaInfo fileMetaInfo) {
    + this.metadata = metadata;
    + this.footer = footer;
    + this.fileMetaInfo = fileMetaInfo;
    + this.stripes = convertProtoStripesToStripes(footer.getStripesList());
    + }
    +
    + public OrcProto.Footer getFooter() {
    + return footer;
    + }
    +
    + public Metadata getMetadata() {
    + return metadata;
    + }
    +
    + public FileMetaInfo getFileMetaInfo() {
    + return fileMetaInfo;
    + }
    +
    + public List<StripeInformation> getStripes() {
    + return stripes;
    + }
    + }
    +
    + @Override
    + public ByteBuffer getSerializedFileFooter() {
    + return footerMetaAndPsBuffer;
    + }

        @Override
        public RecordReader rows() throws IOException {
    @@ -609,14 +714,19 @@ public class ReaderImpl implements Reader {

        @Override
        public long getRawDataSizeFromColIndices(List<Integer> colIndices) {
    + return getRawDataSizeFromColIndices(colIndices, footer);
    + }
    +
    + public static long getRawDataSizeFromColIndices(
    + List<Integer> colIndices, OrcProto.Footer footer) {
          long result = 0;
          for (int colIdx : colIndices) {
    - result += getRawDataSizeOfColumn(colIdx);
    + result += getRawDataSizeOfColumn(colIdx, footer);
          }
          return result;
        }

    - private long getRawDataSizeOfColumn(int colIdx) {
    + private static long getRawDataSizeOfColumn(int colIdx, OrcProto.Footer footer) {
          OrcProto.ColumnStatistics colStat = footer.getStatistics(colIdx);
          long numVals = colStat.getNumberOfValues();
          Type type = footer.getTypes(colIdx);
    @@ -738,4 +848,9 @@ public class ReaderImpl implements Reader {
        public MetadataReader metadata() throws IOException {
          return new MetadataReader(fileSystem, path, codec, bufferSize, footer.getTypesCount());
        }
    +
    + @Override
    + public Footer getFooter() {
    + return footer;
    + }
      }
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11573: PointLookupOptimizer can be pessimistic at a low nDV (Gopal V, reviewed by Jesus Camacho Rodriguez)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b247cac4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b247cac4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b247cac4

    Branch: refs/heads/beeline-cli
    Commit: b247cac4fc3814e422d4f5d5aad96a1c6e385a7b
    Parents: 037fb02
    Author: Gopal V <gopalv@apache.org>
    Authored: Thu Aug 27 09:50:08 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Aug 27 09:50:08 2015 +0200

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
      .../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
      .../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
      .../queries/clientpositive/flatten_and_or.q | 4 +-
      .../test/queries/clientpositive/pointlookup.q | 59 +
      .../test/queries/clientpositive/pointlookup2.q | 51 +
      .../alter_partition_coltype.q.out | 12 +-
      .../clientpositive/annotate_stats_filter.q.out | 8 +-
      .../results/clientpositive/flatten_and_or.q.out | 8 +-
      ql/src/test/results/clientpositive/pcr.q.out | 12 +-
      .../results/clientpositive/pointlookup.q.out | 198 +++
      .../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
      .../results/clientpositive/ppd_transform.q.out | 12 +-
      .../test/results/clientpositive/spark/pcr.q.out | 12 +-
      .../clientpositive/spark/ppd_transform.q.out | 12 +-
      .../clientpositive/spark/vectorized_case.q.out | 2 +-
      .../clientpositive/tez/explainuser_1.q.out | 2 +-
      .../clientpositive/tez/vectorized_case.q.out | 2 +-
      .../clientpositive/vectorized_case.q.out | 9 +-
      19 files changed, 2118 insertions(+), 55 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index 8706a2d..8a00079 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -1190,6 +1190,10 @@ public class HiveConf extends Configuration {
              "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."),
          HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
               "Whether to transform OR clauses in Filter operators into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31,
    + "Minimum number of OR clauses needed to transform into IN clauses"),
    + HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true,
    + "Extract partial expressions when optimizing point lookup IN clauses"),
          // Constant propagation optimizer
          HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"),
          HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"),

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    index 14f362f..439f616 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    @@ -68,6 +68,18 @@ public class Optimizer {

          // Add the transformation that computes the lineage information.
          transformations.add(new Generator());
    +
    + // Try to transform OR predicates in Filter into simpler IN clauses first
    + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    + final int min = HiveConf.getIntVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
    + final boolean extract = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT);
    + final boolean testMode = HiveConf.getBoolVar(hiveConf,
    + HiveConf.ConfVars.HIVE_IN_TEST);
    + transformations.add(new PointLookupOptimizer(min, extract, testMode));
    + }
    +
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PredicateTransitivePropagate());
            if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
    @@ -82,11 +94,6 @@ public class Optimizer {
              transformations.add(new ConstantPropagate());
          }

    - // Try to transform OR predicates in Filter into IN clauses.
    - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
    - transformations.add(new PointLookupOptimizer());
    - }
    -
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
            transformations.add(new PartitionPruner());
            transformations.add(new PartitionConditionRemover());

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    index 6a8acec..d83636d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java
    @@ -18,10 +18,14 @@
      package org.apache.hadoop.hive.ql.optimizer;

      import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.Comparator;
      import java.util.HashMap;
    +import java.util.HashSet;
      import java.util.LinkedHashMap;
      import java.util.List;
      import java.util.Map;
    +import java.util.Set;
      import java.util.Stack;

      import org.apache.calcite.util.Pair;
    @@ -46,15 +50,18 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
      import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
      import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
    +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
      import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
    +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
      import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

      import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.ImmutableSortedSet;
      import com.google.common.collect.ListMultimap;

      /**
    @@ -71,7 +78,49 @@ public class PointLookupOptimizer implements Transform {
                GenericUDFIn.class.getAnnotation(Description.class).name();
        private static final String STRUCT_UDF =
                GenericUDFStruct.class.getAnnotation(Description.class).name();
    + private static final String AND_UDF =
    + GenericUDFOPAnd.class.getAnnotation(Description.class).name();
    +
    + // these are closure-bound for all the walkers in context
    + public final int minOrExpr;
    + public final boolean extract;
    + public final boolean testMode;
    +
    + /*
    + * Pass in configs and pre-create a parse context
    + */
    + public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) {
    + this.minOrExpr = min;
    + this.extract = extract;
    + this.testMode = testMode;
    + }
    +
    + // Hash Set iteration isn't ordered, but force string sorted order
    + // to get a consistent test run.
    + private Collection<ExprNodeDescEqualityWrapper> sortForTests(
    + Set<ExprNodeDescEqualityWrapper> valuesExpr) {
    + if (!testMode) {
    + // normal case - sorting is wasted for an IN()
    + return valuesExpr;
    + }
    + final Collection<ExprNodeDescEqualityWrapper> sortedValues;
    +
    + sortedValues = ImmutableSortedSet.copyOf(
    + new Comparator<ExprNodeDescEqualityWrapper>() {
    + @Override
    + public int compare(ExprNodeDescEqualityWrapper w1,
    + ExprNodeDescEqualityWrapper w2) {
    + // fail if you find nulls (this is a test-code section)
    + if (w1.equals(w2)) {
    + return 0;
    + }
    + return w1.getExprNodeDesc().getExprString()
    + .compareTo(w2.getExprNodeDesc().getExprString());
    + }
    + }, valuesExpr);

    + return sortedValues;
    + }

        @Override
        public ParseContext transform(ParseContext pctx) throws SemanticException {
    @@ -103,7 +152,9 @@ public class PointLookupOptimizer implements Transform {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Generated new predicate with IN clause: " + newPredicate);
              }
    - filterOp.getConf().setOrigPredicate(predicate);
    + if (!extract) {
    + filterOp.getConf().setOrigPredicate(predicate);
    + }
              filterOp.getConf().setPredicate(newPredicate);
            }

    @@ -140,8 +191,11 @@ public class PointLookupOptimizer implements Transform {
              return null;
            }

    - // 2. It is an OR operator
    + // 2. It is an OR operator with enough children
            List<ExprNodeDesc> children = fd.getChildren();
    + if (children.size() < minOrExpr) {
    + return null;
    + }
            ListMultimap<String,Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> columnConstantsMap =
                    ArrayListMultimap.create();
            boolean modeAnd = false;
    @@ -272,6 +326,50 @@ public class PointLookupOptimizer implements Transform {
            newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
                    FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren);

    + if (extract && columns.size() > 1) {
    + final List<ExprNodeDesc> subExpr = new ArrayList<ExprNodeDesc>(columns.size()+1);
    +
    + // extract pre-conditions for the tuple expressions
    + // (a,b) IN ((1,2),(2,3)) ->
    + // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3))
    +
    + for (String keyString : columnConstantsMap.keySet()) {
    + final Set<ExprNodeDescEqualityWrapper> valuesExpr =
    + new HashSet<ExprNodeDescEqualityWrapper>(children.size());
    + final List<Pair<ExprNodeColumnDesc, ExprNodeConstantDesc>> partial =
    + columnConstantsMap.get(keyString);
    + for (int i = 0; i < children.size(); i++) {
    + Pair<ExprNodeColumnDesc, ExprNodeConstantDesc> columnConstant = partial
    + .get(i);
    + valuesExpr
    + .add(new ExprNodeDescEqualityWrapper(columnConstant.right));
    + }
    + ExprNodeColumnDesc lookupCol = partial.get(0).left;
    + // generate a partial IN clause, if the column is a partition column
    + if (lookupCol.getIsPartitionColOrVirtualCol()
    + || valuesExpr.size() < children.size()) {
    + // optimize only nDV reductions
    + final List<ExprNodeDesc> inExpr = new ArrayList<ExprNodeDesc>();
    + inExpr.add(lookupCol);
    + for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) {
    + inExpr.add(value.getExprNodeDesc());
    + }
    + subExpr.add(new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr));
    + }
    + }
    + // loop complete, inspect the sub expressions generated
    + if (subExpr.size() > 0) {
    + // add the newPredicate to the end & produce an AND clause
    + subExpr.add(newPredicate);
    + newPredicate = new ExprNodeGenericFuncDesc(
    + TypeInfoFactory.booleanTypeInfo, FunctionRegistry
    + .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr);
    + }
    + // else, newPredicate is unmodified
    + }
    +
            return newPredicate;
          }


    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/flatten_and_or.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/flatten_and_or.q b/ql/src/test/queries/clientpositive/flatten_and_or.q
    index 6d65225..6c6e0f9 100644
    --- a/ql/src/test/queries/clientpositive/flatten_and_or.q
    +++ b/ql/src/test/queries/clientpositive/flatten_and_or.q
    @@ -1,3 +1,5 @@
    +set hive.optimize.point.lookup=false;
    +
      explain
      SELECT key
      FROM src
    @@ -14,4 +16,4 @@ WHERE
         AND value = '1') OR (key = '9'
         AND value = '1') OR (key = '10'
         AND value = '3'))
    -;
    \ No newline at end of file
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q
    new file mode 100644
    index 0000000..1aef2ef
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup.q
    @@ -0,0 +1,59 @@
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +
    +set hive.optimize.point.lookup.min=3;
    +set hive.optimize.point.lookup.extract=false;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;
    +
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/queries/clientpositive/pointlookup2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q
    new file mode 100644
    index 0000000..31bebbb
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/pointlookup2.q
    @@ -0,0 +1,51 @@
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    +
    +create table pcr_t1 (key int, value string) partitioned by (ds string);
    +insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key;
    +insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key;
    +
    +create table pcr_t2 (ds string, key int, value string);
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08';
    +from pcr_t1
    +insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2;
    +
    +set hive.optimize.point.lookup.min=2;
    +set hive.optimize.point.lookup.extract=true;
    +
    +explain extended
    +select key, value, ds
    +from pcr_t1
    +where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2)
    +order by key, value, ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t1 t2
    +on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09'
    +order by t1.key;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t1.ds='2000-04-08' and t2.key=1) or (t1.ds='2000-04-09' and t2.key=2)
    +order by t2.key, t2.value, t1.ds;
    +
    +explain extended
    +select *
    +from pcr_t1 t1 join pcr_t2 t2
    +where (t2.ds='2000-04-08' and t1.key=1) or (t2.ds='2000-04-09' and t1.key=2)
    +order by t1.key, t1.value, t2.ds;
    +
    +drop table pcr_t1;
    +drop table pcr_t2;
    +drop table pcr_t3;
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    index 06515da..9fc3c8d 100644
    --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
    @@ -1134,15 +1134,11 @@ STAGE PLANS:
                alias: alterdynamic_part_table
                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
                GatherStats: false
    - Filter Operator
    - isSamplingPred: false
    - predicate: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean)
    + Select Operator
    + expressions: intcol (type: string)
    + outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - Select Operator
    - expressions: intcol (type: string)
    - outputColumnNames: _col0
    - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
    - ListSink
    + ListSink

      PREHOOK: query: select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__')
      PREHOOK: type: QUERY

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    index 9e0e78a..054b573 100644
    --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    @@ -678,15 +678,15 @@ STAGE PLANS:
                  alias: loc_orc
                  Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
                  Filter Operator
    - predicate: (state) IN ('OH', 'CA') (type: boolean)
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + predicate: ((state = 'OH') or (state = 'CA')) (type: boolean)
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                    Select Operator
                      expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                      outputColumnNames: _col0, _col1, _col2, _col3
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
    + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/flatten_and_or.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/flatten_and_or.q.out b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    index 5f25daa..9c51ff3 100644
    --- a/ql/src/test/results/clientpositive/flatten_and_or.q.out
    +++ b/ql/src/test/results/clientpositive/flatten_and_or.q.out
    @@ -44,15 +44,15 @@ STAGE PLANS:
                  alias: src
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string)
                      outputColumnNames: _col0
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pcr.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out
    index 4c9ea77..d7c40a3 100644
    --- a/ql/src/test/results/clientpositive/pcr.q.out
    +++ b/ql/src/test/results/clientpositive/pcr.q.out
    @@ -2475,16 +2475,16 @@ STAGE PLANS:
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
    - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean)
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: int), value (type: string), ds (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
                        sort order: +++
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                        tag: -1
                        auto parallelism: false
            Path -> Alias:
    @@ -2588,13 +2588,13 @@ STAGE PLANS:
              Select Operator
                expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string)
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
                  GlobalTableId: 0
      #### A masked pattern was here ####
                  NumFilesPerFileSink: 1
    - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE
      #### A masked pattern was here ####
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/b247cac4/ql/src/test/results/clientpositive/pointlookup.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
    new file mode 100644
    index 0000000..7e19be4
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/pointlookup.q.out
    @@ -0,0 +1,198 @@
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean)
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT key
    +FROM src
    +WHERE
    + ((key = '0'
    + AND value = '8') OR (key = '1'
    + AND value = '5') OR (key = '2'
    + AND value = '6') OR (key = '3'
    + AND value = '8') OR (key = '4'
    + AND value = '1') OR (key = '5'
    + AND value = '6') OR (key = '6'
    + AND value = '1') OR (key = '7'
    + AND value = '1') OR (key = '8'
    + AND value = '1') OR (key = '9'
    + AND value = '1') OR (key = '10'
    + AND value = '3'))
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean)
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: key (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
  • Xuf at Sep 9, 2015 at 7:08 am
    Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/hive


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9763c9dd
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9763c9dd
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9763c9dd

    Branch: refs/heads/beeline-cli
    Commit: 9763c9dd31bd5939db3ca50e75bb97955b411f6d
    Parents: da95f63 8f930e5
    Author: Dmitry Tolpeko <dmtolpeko@gmail.com>
    Authored: Tue Sep 1 05:00:33 2015 -0700
    Committer: Dmitry Tolpeko <dmtolpeko@gmail.com>
    Committed: Tue Sep 1 05:00:33 2015 -0700

    ----------------------------------------------------------------------
      .../java/org/apache/hive/beeline/BeeLine.java | 13 +-
      .../org/apache/hive/beeline/BeeLineOpts.java | 12 +-
      .../org/apache/hive/beeline/HiveSchemaTool.java | 14 +-
      .../org/apache/hadoop/hive/conf/HiveConf.java | 76 +-
      data/conf/tez/hive-site.xml | 9 +
      .../hive/hbase/HiveHBaseInputFormatUtil.java | 50 +-
      .../queries/positive/hbase_null_first_col.q | 22 +
      .../results/positive/hbase_null_first_col.q.out | 109 ++
      .../vectorization/VectorizationBench.java | 93 +
      .../org/apache/hive/jdbc/HiveConnection.java | 30 +-
      jdbc/src/java/org/apache/hive/jdbc/Utils.java | 117 +-
      .../hive/jdbc/ZooKeeperHiveClientHelper.java | 104 +-
      .../hadoop/hive/metastore/HiveAlterHandler.java | 2 +-
      .../hadoop/hive/metastore/HiveMetaStore.java | 6 +-
      .../hive/metastore/MetaStoreDirectSql.java | 40 +-
      .../hadoop/hive/metastore/MetaStoreUtils.java | 17 +-
      .../hadoop/hive/ql/exec/FunctionRegistry.java | 61 +-
      .../hadoop/hive/ql/exec/ScriptOperator.java | 85 +-
      .../hive/ql/exec/tez/TezSessionState.java | 2 +
      .../ql/exec/vector/VectorHashKeyWrapper.java | 2 +-
      .../ql/exec/vector/VectorizationContext.java | 7 +-
      .../expressions/CastStringGroupToString.java | 40 +
      .../ql/exec/vector/expressions/ColAndCol.java | 34 +-
      .../ql/exec/vector/expressions/ColOrCol.java | 42 +-
      .../exec/vector/expressions/CuckooSetBytes.java | 2 +-
      .../hive/ql/exec/vector/expressions/NotCol.java | 14 +-
      ...VectorMapJoinInnerBigOnlyStringOperator.java | 4 +-
      .../VectorMapJoinInnerStringOperator.java | 4 +-
      .../VectorMapJoinLeftSemiStringOperator.java | 4 +-
      .../VectorMapJoinOuterStringOperator.java | 4 +-
      .../apache/hadoop/hive/ql/io/orc/FileDump.java | 48 +-
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 +-
      .../apache/hadoop/hive/ql/io/orc/Reader.java | 6 +
      .../hadoop/hive/ql/io/orc/ReaderImpl.java | 281 ++-
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 -
      .../hive/ql/io/orc/TreeReaderFactory.java | 18 +-
      .../hive/ql/io/parquet/LeafFilterFactory.java | 43 +-
      .../read/ParquetFilterPredicateConverter.java | 35 +-
      .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 3 -
      .../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 +-
      .../hadoop/hive/ql/lib/ForwardWalker.java | 33 +-
      .../ql/metadata/SessionHiveMetaStoreClient.java | 2 +-
      .../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
      .../hive/ql/optimizer/ConstantPropagate.java | 10 +-
      .../optimizer/ConstantPropagateProcFactory.java | 100 +-
      .../hadoop/hive/ql/optimizer/IndexUtils.java | 13 +-
      .../hadoop/hive/ql/optimizer/Optimizer.java | 17 +-
      .../hive/ql/optimizer/PointLookupOptimizer.java | 102 +-
      .../calcite/translator/HiveOpConverter.java | 53 +-
      .../ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +-
      .../hadoop/hive/ql/parse/LeadLagInfo.java | 4 +-
      .../hive/ql/parse/LoadSemanticAnalyzer.java | 38 +-
      .../hadoop/hive/ql/plan/ExprNodeDesc.java | 23 +-
      .../hadoop/hive/ql/ppd/ExprWalkerInfo.java | 136 +-
      .../hive/ql/ppd/ExprWalkerProcFactory.java | 92 +-
      .../hadoop/hive/ql/ppd/OpProcFactory.java | 11 +-
      .../hadoop/hive/ql/udf/generic/GenericUDF.java | 14 +-
      .../ql/udf/generic/GenericUDFBaseNumeric.java | 4 +-
      .../hive/ql/udf/generic/GenericUDFBasePad.java | 8 +-
      .../hive/ql/udf/generic/GenericUDFNvl.java | 2 +-
      .../hive/ql/udf/generic/GenericUDFOPAnd.java | 4 +
      .../hive/ql/udf/generic/GenericUDFOPEqual.java | 4 +
      .../generic/GenericUDFOPEqualOrGreaterThan.java | 4 +
      .../generic/GenericUDFOPEqualOrLessThan.java | 4 +
      .../ql/udf/generic/GenericUDFOPGreaterThan.java | 4 +
      .../ql/udf/generic/GenericUDFOPLessThan.java | 4 +
      .../ql/udf/generic/GenericUDFOPNotEqual.java | 5 +
      .../ql/udf/generic/GenericUDFOPNotNull.java | 4 +
      .../hive/ql/udf/generic/GenericUDFOPNull.java | 4 +
      .../hive/ql/udf/generic/GenericUDFOPOr.java | 4 +
      ql/src/main/resources/tez-container-log4j2.xml | 49 +
      .../apache/hadoop/hive/ql/TestTxnCommands2.java | 85 +-
      .../hadoop/hive/ql/exec/TestOperators.java | 16 +
      .../hive/ql/io/orc/TestInputOutputFormat.java | 4 +-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 10 +-
      .../hive/ql/io/orc/TestRecordReaderImpl.java | 42 +-
      .../parquet/TestParquetRecordReaderWrapper.java | 50 +-
      .../read/TestParquetFilterPredicate.java | 27 +-
      .../ql/io/sarg/TestConvertAstToSearchArg.java | 128 +-
      .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 22 +-
      .../queries/clientnegative/load_orc_negative3.q | 6 +
      .../queries/clientnegative/nvl_mismatch_type.q | 20 +
      .../clientpositive/cbo_rp_outer_join_ppr.q | 40 +
      .../clientpositive/columnstats_quoting.q | 8 +
      .../queries/clientpositive/flatten_and_or.q | 4 +-
      .../queries/clientpositive/folder_predicate.q | 32 +
      .../test/queries/clientpositive/load_orc_part.q | 4 +
      .../clientpositive/parquet_ppd_partition.q | 9 +
      .../clientpositive/parquet_predicate_pushdown.q | 297 +++-
      .../test/queries/clientpositive/pointlookup.q | 59 +
      .../test/queries/clientpositive/pointlookup2.q | 51 +
      .../queries/clientpositive/selectDistinctStar.q | 2 +
      ql/src/test/queries/clientpositive/structin.q | 6 +
      .../clientpositive/unionall_unbalancedppd.q | 3 +
      .../clientpositive/vector_cast_constant.q | 4 +-
      .../queries/clientpositive/vectorized_casts.q | 6 +
      .../clientnegative/char_pad_convert_fail0.q.out | 2 +-
      .../clientnegative/char_pad_convert_fail1.q.out | 2 +-
      .../clientnegative/char_pad_convert_fail3.q.out | 2 +-
      .../clientnegative/load_orc_negative3.q.out | 25 +
      .../clientnegative/nvl_mismatch_type.q.out | 43 +
      .../alter_partition_coltype.q.out | 12 +-
      .../clientpositive/annotate_stats_filter.q.out | 18 +-
      .../cbo_rp_outer_join_ppr.q.java1.7.out | 855 +++++++++
      .../clientpositive/columnstats_quoting.q.out | 114 ++
      .../results/clientpositive/decimal_udf.q.out | 18 +-
      .../clientpositive/filter_cond_pushdown.q.out | 32 +-
      .../clientpositive/filter_join_breaktask.q.out | 12 +-
      .../results/clientpositive/flatten_and_or.q.out | 8 +-
      .../test/results/clientpositive/fold_when.q.out | 16 +-
      .../clientpositive/folder_predicate.q.out | 368 ++++
      .../clientpositive/input_testxpath2.q.out | 2 +-
      .../list_bucket_query_oneskew_3.q.out | 6 +-
      .../results/clientpositive/load_orc_part.q.out | 18 +
      .../clientpositive/parquet_ppd_partition.q.out | 47 +
      .../parquet_predicate_pushdown.q.out | 1309 +++++++++++++-
      ql/src/test/results/clientpositive/pcr.q.out | 12 +-
      .../results/clientpositive/pointlookup.q.out | 198 +++
      .../results/clientpositive/pointlookup2.q.out | 1647 ++++++++++++++++++
      .../results/clientpositive/ppd_transform.q.out | 12 +-
      .../clientpositive/rand_partitionpruner3.q.out | 12 +-
      .../clientpositive/selectDistinctStar.q.out | 8 +-
      .../clientpositive/select_unquote_not.q.out | 8 +-
      .../spark/filter_join_breaktask.q.out | 12 +-
      .../test/results/clientpositive/spark/pcr.q.out | 12 +-
      .../clientpositive/spark/ppd_transform.q.out | 12 +-
      .../spark/vector_cast_constant.q.java1.7.out | 41 +-
      .../clientpositive/spark/vectorized_case.q.out | 2 +-
      .../test/results/clientpositive/structin.q.out | 44 +
      .../clientpositive/tez/explainuser_1.q.out | 2 +-
      .../tez/filter_join_breaktask.q.out | 12 +-
      .../clientpositive/tez/selectDistinctStar.q.out | 8 +-
      .../tez/vector_cast_constant.q.java1.7.out | 21 +-
      .../tez/vector_char_mapjoin1.q.out | 1 +
      .../clientpositive/tez/vector_decimal_udf.q.out | 24 +-
      .../tez/vector_varchar_mapjoin1.q.out | 1 +
      .../clientpositive/tez/vectorized_case.q.out | 2 +-
      .../clientpositive/tez/vectorized_casts.q.out | 99 +-
      .../clientpositive/udf_isnull_isnotnull.q.out | 2 +-
      .../test/results/clientpositive/udf_size.q.out | 2 +-
      .../clientpositive/unionall_unbalancedppd.q.out | 14 +-
      .../vector_cast_constant.q.java1.7.out | 32 +-
      .../clientpositive/vector_char_mapjoin1.q.out | 1 +
      .../clientpositive/vector_decimal_udf.q.out | 24 +-
      .../vector_varchar_mapjoin1.q.out | 1 +
      .../clientpositive/vectorized_case.q.out | 9 +-
      .../clientpositive/vectorized_casts.q.out | 66 +-
      .../serde2/avro/AvroLazyObjectInspector.java | 19 +-
      .../hadoop/hive/serde2/avro/InstanceCache.java | 17 +-
      .../objectinspector/ObjectInspectorFactory.java | 61 +-
      .../ReflectionStructObjectInspector.java | 60 +-
      .../StandardStructObjectInspector.java | 7 +-
      .../ThriftUnionObjectInspector.java | 28 +-
      .../avro/TestAvroLazyObjectInspector.java | 59 +
      .../TestReflectionObjectInspectors.java | 71 +-
      .../apache/hive/service/server/HiveServer2.java | 74 +-
      shims/0.20S/pom.xml | 8 +-
      .../hadoop/hive/shims/Hadoop20SShims.java | 35 +-
      shims/0.23/pom.xml | 21 +-
      .../apache/hadoop/hive/shims/Hadoop23Shims.java | 79 +-
      .../apache/hadoop/hive/shims/HadoopShims.java | 22 +
      .../hadoop/hive/shims/HadoopShimsSecure.java | 32 +
      .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 3 +-
      163 files changed, 7857 insertions(+), 1241 deletions(-)
    ----------------------------------------------------------------------
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f530f44d
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f530f44d
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f530f44d

    Branch: refs/heads/beeline-cli
    Commit: f530f44d1d95c2da2485d53f0855f8f8e0646005
    Parents: c0690a6
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Tue Sep 1 11:23:14 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Tue Sep 1 11:23:14 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 169 +++++++++++--------
      .../hive/ql/io/orc/TestInputOutputFormat.java | 13 +-
      2 files changed, 107 insertions(+), 75 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    index 8c138b9..05efc5f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    @@ -28,6 +28,7 @@ import java.util.Map;
      import java.util.NavigableMap;
      import java.util.TreeMap;
      import java.util.concurrent.Callable;
    +import java.util.concurrent.ExecutorCompletionService;
      import java.util.concurrent.ExecutorService;
      import java.util.concurrent.Executors;
      import java.util.concurrent.Future;
    @@ -51,6 +52,7 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat;
      import org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData;
      import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
      import org.apache.hadoop.hive.ql.io.AcidUtils;
    +import org.apache.hadoop.hive.ql.io.AcidUtils.Directory;
      import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
      import org.apache.hadoop.hive.ql.io.InputFormatChecker;
      import org.apache.hadoop.hive.ql.io.RecordIdentifier;
    @@ -371,6 +373,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          private final Configuration conf;
          private static Cache<Path, FileInfo> footerCache;
          private static ExecutorService threadPool = null;
    + private static ExecutorCompletionService<AcidDirInfo> ecs = null;
          private final int numBuckets;
          private final long maxSize;
          private final long minSize;
    @@ -416,6 +419,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
                threadPool = Executors.newFixedThreadPool(numThreads,
                    new ThreadFactoryBuilder().setDaemon(true)
                        .setNameFormat("ORC_GET_SPLITS #%d").build());
    + ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
              }

              if (footerCache == null && cacheStripeDetails) {
    @@ -433,10 +437,34 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          }
        }

    + /**
    + * The full ACID directory information needed for splits; no more calls to HDFS needed.
    + * We could just live with AcidUtils.Directory but...
    + * 1) That doesn't contain have base files.
    + * 2) We save fs for convenience to avoid getting it twice.
    + */
    + @VisibleForTesting
    + static final class AcidDirInfo {
    + public AcidDirInfo(FileSystem fs, Path splitPath, Directory acidInfo,
    + List<HdfsFileStatusWithId> baseOrOriginalFiles) {
    + this.splitPath = splitPath;
    + this.acidInfo = acidInfo;
    + this.baseOrOriginalFiles = baseOrOriginalFiles;
    + this.fs = fs;
    + }
    +
    + final FileSystem fs;
    + final Path splitPath;
    + final AcidUtils.Directory acidInfo;
    + final List<HdfsFileStatusWithId> baseOrOriginalFiles;
    + }
    +
    + @VisibleForTesting
        interface SplitStrategy<T> {
          List<T> getSplits() throws IOException;
        }

    + @VisibleForTesting
        static final class SplitInfo extends ACIDSplitStrategy {
          private final Context context;
          private final FileSystem fs;
    @@ -638,7 +666,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         * Given a directory, get the list of files and blocks in those files.
         * To parallelize file generator use "mapreduce.input.fileinputformat.list-status.num-threads"
         */
    - static final class FileGenerator implements Callable<SplitStrategy> {
    + static final class FileGenerator implements Callable<AcidDirInfo> {
          private final Context context;
          private final FileSystem fs;
          private final Path dir;
    @@ -652,69 +680,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          }

          @Override
    - public SplitStrategy call() throws IOException {
    - final SplitStrategy splitStrategy;
    + public AcidDirInfo call() throws IOException {
            AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir,
                context.conf, context.transactionList, useFileIds);
    - List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
            Path base = dirInfo.getBaseDirectory();
    - List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
    - boolean[] covered = new boolean[context.numBuckets];
    - boolean isOriginal = base == null;
    -
    - // if we have a base to work from
    - if (base != null || !original.isEmpty()) {
    -
    - // find the base files (original or new style)
    - List<HdfsFileStatusWithId> children = original;
    - if (base != null) {
    - children = findBaseFiles(base, useFileIds);
    - }
    -
    - long totalFileSize = 0;
    - for (HdfsFileStatusWithId child : children) {
    - totalFileSize += child.getFileStatus().getLen();
    - AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
    - (child.getFileStatus().getPath(), context.conf);
    - int b = opts.getBucket();
    - // If the bucket is in the valid range, mark it as covered.
    - // I wish Hive actually enforced bucketing all of the time.
    - if (b >= 0 && b < covered.length) {
    - covered[b] = true;
    - }
    - }
    -
    - int numFiles = children.size();
    - long avgFileSize = totalFileSize / numFiles;
    - int totalFiles = context.numFilesCounter.addAndGet(numFiles);
    - switch(context.splitStrategyKind) {
    - case BI:
    - // BI strategy requested through config
    - splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal,
    - deltas, covered);
    - break;
    - case ETL:
    - // ETL strategy requested through config
    - splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal,
    - deltas, covered);
    - break;
    - default:
    - // HYBRID strategy
    - if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
    - splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas,
    - covered);
    - } else {
    - splitStrategy = new BISplitStrategy(context, fs, dir, children, isOriginal, deltas,
    - covered);
    - }
    - break;
    - }
    - } else {
    - // no base, only deltas
    - splitStrategy = new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
    - }
    -
    - return splitStrategy;
    + // find the base files (original or new style)
    + List<HdfsFileStatusWithId> children = (base == null)
    + ? dirInfo.getOriginalFiles() : findBaseFiles(base, useFileIds);
    + return new AcidDirInfo(fs, dir, dirInfo, children);
          }

          private List<HdfsFileStatusWithId> findBaseFiles(
    @@ -1052,21 +1025,24 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          // use threads to resolve directories into splits
          Context context = new Context(conf, numSplits);
          List<OrcSplit> splits = Lists.newArrayList();
    - List<Future<?>> pathFutures = Lists.newArrayList();
    - List<Future<?>> splitFutures = Lists.newArrayList();
    + List<Future<AcidDirInfo>> pathFutures = Lists.newArrayList();
    + List<Future<List<OrcSplit>>> splitFutures = Lists.newArrayList();

          // multi-threaded file statuses and split strategy
          boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
    - for (Path dir : getInputPaths(conf)) {
    + Path[] paths = getInputPaths(conf);
    + for (Path dir : paths) {
            FileSystem fs = dir.getFileSystem(conf);
            FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
    - pathFutures.add(context.threadPool.submit(fileGenerator));
    + pathFutures.add(Context.ecs.submit(fileGenerator));
          }

          // complete path futures and schedule split generation
          try {
    - for (Future<?> pathFuture : pathFutures) {
    - SplitStrategy splitStrategy = (SplitStrategy) pathFuture.get();
    + for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
    + AcidDirInfo adi = Context.ecs.take().get();
    + SplitStrategy splitStrategy = determineSplitStrategy(
    + context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);

              if (isDebugEnabled) {
                LOG.debug(splitStrategy);
    @@ -1075,7 +1051,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
              if (splitStrategy instanceof ETLSplitStrategy) {
                List<SplitInfo> splitInfos = splitStrategy.getSplits();
                for (SplitInfo splitInfo : splitInfos) {
    - splitFutures.add(context.threadPool.submit(new SplitGenerator(splitInfo)));
    + splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
                }
              } else {
                splits.addAll(splitStrategy.getSplits());
    @@ -1083,8 +1059,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
            }

            // complete split futures
    - for (Future<?> splitFuture : splitFutures) {
    - splits.addAll((Collection<? extends OrcSplit>) splitFuture.get());
    + for (Future<List<OrcSplit>> splitFuture : splitFutures) {
    + splits.addAll(splitFuture.get());
            }
          } catch (Exception e) {
            cancelFutures(pathFutures);
    @@ -1106,8 +1082,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          return splits;
        }

    - private static void cancelFutures(List<Future<?>> futures) {
    - for (Future future : futures) {
    + private static <T> void cancelFutures(List<Future<T>> futures) {
    + for (Future<T> future : futures) {
            future.cancel(true);
          }
        }
    @@ -1375,6 +1351,55 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
              directory);
        }

    +
    + @VisibleForTesting
    + static SplitStrategy determineSplitStrategy(Context context, FileSystem fs, Path dir,
    + AcidUtils.Directory dirInfo, List<HdfsFileStatusWithId> baseOrOriginalFiles) {
    + Path base = dirInfo.getBaseDirectory();
    + List<HdfsFileStatusWithId> original = dirInfo.getOriginalFiles();
    + List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
    + boolean[] covered = new boolean[context.numBuckets];
    + boolean isOriginal = base == null;
    +
    + // if we have a base to work from
    + if (base != null || !original.isEmpty()) {
    + long totalFileSize = 0;
    + for (HdfsFileStatusWithId child : baseOrOriginalFiles) {
    + totalFileSize += child.getFileStatus().getLen();
    + AcidOutputFormat.Options opts = AcidUtils.parseBaseBucketFilename
    + (child.getFileStatus().getPath(), context.conf);
    + int b = opts.getBucket();
    + // If the bucket is in the valid range, mark it as covered.
    + // I wish Hive actually enforced bucketing all of the time.
    + if (b >= 0 && b < covered.length) {
    + covered[b] = true;
    + }
    + }
    +
    + int numFiles = baseOrOriginalFiles.size();
    + long avgFileSize = totalFileSize / numFiles;
    + int totalFiles = context.numFilesCounter.addAndGet(numFiles);
    + switch(context.splitStrategyKind) {
    + case BI:
    + // BI strategy requested through config
    + return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
    + case ETL:
    + // ETL strategy requested through config
    + return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
    + default:
    + // HYBRID strategy
    + if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
    + return new ETLSplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
    + } else {
    + return new BISplitStrategy(context, fs, dir, baseOrOriginalFiles, isOriginal, deltas, covered);
    + }
    + }
    + } else {
    + // no base, only deltas
    + return new ACIDSplitStrategy(dir, context.numBuckets, deltas, covered);
    + }
    + }
    +
        @Override
        public RawReader<OrcStruct> getRawReader(Configuration conf,
                                                 boolean collapseEvents,

    http://git-wip-us.apache.org/repos/asf/hive/blob/f530f44d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    index ce86cd8..8ba4d2e 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    @@ -484,7 +484,7 @@ public class TestInputOutputFormat {
                    conf, n);
                OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
                    context, fs, new MockPath(fs, "mock:/a/b"), false);
    - final SplitStrategy splitStrategy = gen.call();
    + final SplitStrategy splitStrategy = createSplitStrategy(context, gen);
                assertTrue(
                    String.format(
                        "Split strategy for %d files x %d size for %d splits", c, s,
    @@ -508,7 +508,7 @@ public class TestInputOutputFormat {
          OrcInputFormat.FileGenerator gen =
            new OrcInputFormat.FileGenerator(context, fs,
                new MockPath(fs, "mock:/a/b"), false);
    - SplitStrategy splitStrategy = gen.call();
    + OrcInputFormat.SplitStrategy splitStrategy = createSplitStrategy(context, gen);
          assertEquals(true, splitStrategy instanceof OrcInputFormat.BISplitStrategy);

          conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
    @@ -521,11 +521,18 @@ public class TestInputOutputFormat {
              new MockFile("mock:/a/b/part-04", 1000, new byte[1000]));
          gen = new OrcInputFormat.FileGenerator(context, fs,
                  new MockPath(fs, "mock:/a/b"), false);
    - splitStrategy = gen.call();
    + splitStrategy = createSplitStrategy(context, gen);
          assertEquals(true, splitStrategy instanceof OrcInputFormat.ETLSplitStrategy);

        }

    + private OrcInputFormat.SplitStrategy createSplitStrategy(
    + OrcInputFormat.Context context, OrcInputFormat.FileGenerator gen) throws IOException {
    + OrcInputFormat.AcidDirInfo adi = gen.call();
    + return OrcInputFormat.determineSplitStrategy(
    + context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);
    + }
    +
        public static class MockBlock {
          int offset;
          int length;
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11701 : Make tez tests AM logs work with new log4j2 changes (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a338f33c
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a338f33c
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a338f33c

    Branch: refs/heads/beeline-cli
    Commit: a338f33cc2b17d90d391466090839f76bebc1163
    Parents: 78e7015
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Mon Aug 31 15:54:08 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Mon Aug 31 15:54:08 2015 -0700

    ----------------------------------------------------------------------
      data/conf/tez/hive-site.xml | 5 +++++
      1 file changed, 5 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/a338f33c/data/conf/tez/hive-site.xml
    ----------------------------------------------------------------------
    diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
    index 2f9415a..b4abe90 100644
    --- a/data/conf/tez/hive-site.xml
    +++ b/data/conf/tez/hive-site.xml
    @@ -258,4 +258,9 @@
        <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
      </property>

    +<property>
    + <name>tez.am.launch.cmd-opts</name>
    + <value> -Dlog4j.configurationFile=tez-container-log4j2.xml -Dtez.container.log.level=INFO -Dtez.container.root.logger=CLA </value>
    +</property>
    +
      </configuration>
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11504: Predicate pushing down doesn't work for float type for Parquet(Ferdinand Xu, reviewed by Sergio Pena and Owen O'Malley)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f930e58
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f930e58
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f930e58

    Branch: refs/heads/beeline-cli
    Commit: 8f930e588efd6ec937b9ad20fcf09030ae210ec3
    Parents: a338f33
    Author: Ferdinand Xu <cheng.a.xu@intel.com>
    Authored: Mon Aug 31 21:07:10 2015 -0400
    Committer: Ferdinand Xu <cheng.a.xu@intel.com>
    Committed: Mon Aug 31 21:07:10 2015 -0400

    ----------------------------------------------------------------------
      .../hive/ql/io/parquet/LeafFilterFactory.java | 29 +-
      .../read/TestParquetFilterPredicate.java | 21 +
      .../clientpositive/parquet_ppd_partition.q | 9 +
      .../clientpositive/parquet_predicate_pushdown.q | 297 +++-
      .../clientpositive/parquet_ppd_partition.q.out | 47 +
      .../parquet_predicate_pushdown.q.out | 1309 +++++++++++++++++-
      6 files changed, 1660 insertions(+), 52 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    index 1ceea6e..3e00612 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
    @@ -31,6 +31,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
      import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
      import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
      import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
    +import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn;
      import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;

      public class LeafFilterFactory {
    @@ -83,6 +84,25 @@ public class LeafFilterFactory {
          }
        }

    + class FloatFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {
    + @Override
    + public FilterPredicate buildPredict(Operator op, Object constant, String columnName) {
    + switch (op) {
    + case LESS_THAN:
    + return lt(floatColumn(columnName), ((Number) constant).floatValue());
    + case IS_NULL:
    + case EQUALS:
    + case NULL_SAFE_EQUALS:
    + return eq(floatColumn(columnName),
    + (constant == null) ? null : ((Number) constant).floatValue());
    + case LESS_THAN_EQUALS:
    + return ltEq(FilterApi.floatColumn(columnName), ((Number) constant).floatValue());
    + default:
    + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
    + }
    + }
    + }
    +
        class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder {

          @Override
    @@ -158,8 +178,13 @@ public class LeafFilterFactory {
              } else {
                return new LongFilterPredicateLeafBuilder();
              }
    - case FLOAT: // float and double
    - return new DoubleFilterPredicateLeafBuilder();
    + case FLOAT:
    + if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
    + PrimitiveType.PrimitiveTypeName.FLOAT) {
    + return new FloatFilterPredicateLeafBuilder();
    + } else {
    + return new DoubleFilterPredicateLeafBuilder();
    + }
            case STRING: // string, char, varchar
              return new BinaryFilterPredicateLeafBuilder();
            case BOOLEAN:

    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    index ac5c1a0..2be2596 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/read/TestParquetFilterPredicate.java
    @@ -48,4 +48,25 @@ public class TestParquetFilterPredicate {
          String expected = "and(not(eq(a, null)), not(eq(a, Binary{\"stinger\"})))";
          assertEquals(expected, p.toString());
        }
    +
    + @Test
    + public void testFilterFloatColumns() {
    + MessageType schema =
    + MessageTypeParser.parseMessageType("message test { required float a; required int32 b; }");
    + SearchArgument sarg = SearchArgumentFactory.newBuilder()
    + .startNot()
    + .startOr()
    + .isNull("a", PredicateLeaf.Type.FLOAT)
    + .between("a", PredicateLeaf.Type.FLOAT, 10.2, 20.3)
    + .in("b", PredicateLeaf.Type.LONG, 1L, 2L, 3L)
    + .end()
    + .end()
    + .build();
    +
    + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
    +
    + String expected =
    + "and(and(not(eq(a, null)), not(and(lt(a, 20.3), not(lteq(a, 10.2))))), not(or(or(eq(b, 1), eq(b, 2)), eq(b, 3))))";
    + assertEquals(expected, p.toString());
    + }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_partition.q b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
    new file mode 100644
    index 0000000..08af84f
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/parquet_ppd_partition.q
    @@ -0,0 +1,9 @@
    +SET hive.optimize.index.filter=true;
    +SET hive.optimize.ppd=true;
    +
    +-- Test predicate with partitioned columns
    +CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
    +ALTER TABLE part1 ADD PARTITION (p='p1');
    +INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
    +SELECT * FROM part1 WHERE p='p1';
    +DROP TABLE part1 PURGE;
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
    index 08af84f..32767e8 100644
    --- a/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
    +++ b/ql/src/test/queries/clientpositive/parquet_predicate_pushdown.q
    @@ -1,9 +1,292 @@
    -SET hive.optimize.index.filter=true;
      SET hive.optimize.ppd=true;

    --- Test predicate with partitioned columns
    -CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET;
    -ALTER TABLE part1 ADD PARTITION (p='p1');
    -INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b');
    -SELECT * FROM part1 WHERE p='p1';
    -DROP TABLE part1 PURGE;
    \ No newline at end of file
    +-- SORT_QUERY_RESULTS
    +CREATE TABLE tbl_pred(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS PARQUET;
    +
    +CREATE TABLE staging(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE;
    +
    +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging;
    +
    +INSERT INTO TABLE tbl_pred select * from staging;
    +
    +-- no predicate case. the explain plan should not have filter expression in table scan operator
    +
    +SELECT SUM(HASH(t)) FROM tbl_pred;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT SUM(HASH(t)) FROM tbl_pred;
    +SET hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred;
    +SET hive.optimize.index.filter=false;
    +
    +-- all the following queries have predicates which are pushed down to table scan operator if
    +-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
    +-- in table scan operator.
    +
    +SELECT * FROM tbl_pred WHERE t<2 limit 1;
    +SET hive.optimize.index.filter=true;
    +SELECT * FROM tbl_pred WHERE t<2 limit 1;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT * FROM tbl_pred WHERE t>2 limit 1;
    +SET hive.optimize.index.filter=true;
    +SELECT * FROM tbl_pred WHERE t>2 limit 1;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2;
    +SET hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    + ;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    + ;
    +SET hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    + ;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    + ;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s;
    +
    +set hive.optimize.index.filter=true;
    +SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s;
    +set hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    +
    +SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    +
    +SET hive.optimize.index.filter=true;
    +SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    +
    +EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3;
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    +
    +
    +SET hive.optimize.index.filter=true;
    +EXPLAIN SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3;
    +SET hive.optimize.index.filter=false;
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
    new file mode 100644
    index 0000000..4186618
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/parquet_ppd_partition.q.out
    @@ -0,0 +1,47 @@
    +PREHOOK: query: -- Test predicate with partitioned columns
    +CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@part1
    +POSTHOOK: query: -- Test predicate with partitioned columns
    +CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@part1
    +PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
    +PREHOOK: type: ALTERTABLE_ADDPARTS
    +PREHOOK: Output: default@part1
    +POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
    +POSTHOOK: type: ALTERTABLE_ADDPARTS
    +POSTHOOK: Output: default@part1
    +POSTHOOK: Output: default@part1@p=p1
    +PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__1
    +PREHOOK: Output: default@part1@p=p1
    +POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__1
    +POSTHOOK: Output: default@part1@p=p1
    +POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    +POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@part1
    +PREHOOK: Input: default@part1@p=p1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@part1
    +POSTHOOK: Input: default@part1@p=p1
    +#### A masked pattern was here ####
    +1 a p1
    +2 b p1
    +PREHOOK: query: DROP TABLE part1 PURGE
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@part1
    +PREHOOK: Output: default@part1
    +POSTHOOK: query: DROP TABLE part1 PURGE
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@part1
    +POSTHOOK: Output: default@part1

    http://git-wip-us.apache.org/repos/asf/hive/blob/8f930e58/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
    index 4186618..1dc2937 100644
    --- a/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
    +++ b/ql/src/test/results/clientpositive/parquet_predicate_pushdown.q.out
    @@ -1,47 +1,1270 @@
    -PREHOOK: query: -- Test predicate with partitioned columns
    -CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +CREATE TABLE tbl_pred(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS PARQUET
      PREHOOK: type: CREATETABLE
      PREHOOK: Output: database:default
    -PREHOOK: Output: default@part1
    -POSTHOOK: query: -- Test predicate with partitioned columns
    -CREATE TABLE part1 (id int, content string) PARTITIONED BY (p string) STORED AS PARQUET
    +PREHOOK: Output: default@tbl_pred
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +CREATE TABLE tbl_pred(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS PARQUET
      POSTHOOK: type: CREATETABLE
      POSTHOOK: Output: database:default
    -POSTHOOK: Output: default@part1
    -PREHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
    -PREHOOK: type: ALTERTABLE_ADDPARTS
    -PREHOOK: Output: default@part1
    -POSTHOOK: query: ALTER TABLE part1 ADD PARTITION (p='p1')
    -POSTHOOK: type: ALTERTABLE_ADDPARTS
    -POSTHOOK: Output: default@part1
    -POSTHOOK: Output: default@part1@p=p1
    -PREHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
    -PREHOOK: type: QUERY
    -PREHOOK: Input: default@values__tmp__table__1
    -PREHOOK: Output: default@part1@p=p1
    -POSTHOOK: query: INSERT INTO TABLE part1 PARTITION (p='p1') VALUES (1, 'a'), (2, 'b')
    -POSTHOOK: type: QUERY
    -POSTHOOK: Input: default@values__tmp__table__1
    -POSTHOOK: Output: default@part1@p=p1
    -POSTHOOK: Lineage: part1 PARTITION(p=p1).content SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    -POSTHOOK: Lineage: part1 PARTITION(p=p1).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    -PREHOOK: query: SELECT * FROM part1 WHERE p='p1'
    -PREHOOK: type: QUERY
    -PREHOOK: Input: default@part1
    -PREHOOK: Input: default@part1@p=p1
    -#### A masked pattern was here ####
    -POSTHOOK: query: SELECT * FROM part1 WHERE p='p1'
    -POSTHOOK: type: QUERY
    -POSTHOOK: Input: default@part1
    -POSTHOOK: Input: default@part1@p=p1
    -#### A masked pattern was here ####
    -1 a p1
    -2 b p1
    -PREHOOK: query: DROP TABLE part1 PURGE
    -PREHOOK: type: DROPTABLE
    -PREHOOK: Input: default@part1
    -PREHOOK: Output: default@part1
    -POSTHOOK: query: DROP TABLE part1 PURGE
    -POSTHOOK: type: DROPTABLE
    -POSTHOOK: Input: default@part1
    -POSTHOOK: Output: default@part1
    +POSTHOOK: Output: default@tbl_pred
    +PREHOOK: query: CREATE TABLE staging(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@staging
    +POSTHOOK: query: CREATE TABLE staging(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@staging
    +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@staging
    +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@staging
    +PREHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@staging
    +PREHOOK: Output: default@tbl_pred
    +POSTHOOK: query: INSERT INTO TABLE tbl_pred select * from staging
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@staging
    +POSTHOOK: Output: default@tbl_pred
    +POSTHOOK: Lineage: tbl_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
    +POSTHOOK: Lineage: tbl_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
    +PREHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
    +
    +SELECT SUM(HASH(t)) FROM tbl_pred
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- no predicate case. the explain plan should not have filter expression in table scan operator
    +
    +SELECT SUM(HASH(t)) FROM tbl_pred
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +62430
    +PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +62430
    +PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: hash(t) (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col0)
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: hash(t) (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col0)
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
    +-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
    +-- in table scan operator.
    +
    +SELECT * FROM tbl_pred WHERE t<2 limit 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- all the following queries have predicates which are pushed down to table scan operator if
    +-- hive.optimize.index.filter is set to true. the explain plan should show filter expression
    +-- in table scan operator.
    +
    +SELECT * FROM tbl_pred WHERE t<2 limit 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
    +PREHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT * FROM tbl_pred WHERE t<2 limit 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
    +PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
    +PREHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT * FROM tbl_pred WHERE t>2 limit 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
    +PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-8
    +PREHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-8
    +PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
    + Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: hash(t) (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col0)
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT SUM(HASH(t)) FROM tbl_pred
    + WHERE t IS NOT NULL
    + AND t < 0
    + AND t > -2
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean)
    + Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: hash(t) (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 116 Data size: 1276 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col0)
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-1 bob laertes
    +-1 bob young
    +PREHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +-1 bob laertes
    +-1 bob young
    +PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: -1 (type: tinyint), s (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE t <=> -1
    + AND s IS NOT NULL
    + AND s LIKE 'bob%'
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((t = -1) and s is not null) and (s like 'bob%')) (type: boolean)
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: -1 (type: tinyint), s (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +26 bob ovid
    +26 bob quirinius
    +27 bob ovid
    +PREHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +26 bob ovid
    +26 bob quirinius
    +27 bob ovid
    +PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), s (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: tinyint), _col1 (type: string)
    + sort order: ++
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, s FROM tbl_pred
    + WHERE s IS NOT NULL
    + AND s LIKE 'bob%'
    + AND t NOT IN (-1,-2,-3)
    + AND t BETWEEN 25 AND 30
    + SORT BY t,s
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((s is not null and (s like 'bob%')) and (not (t) IN (-1, -2, -3))) and t BETWEEN 25 AND 30) (type: boolean)
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), s (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: tinyint), _col1 (type: string)
    + sort order: ++
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 65 Data size: 715 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +101 327 11.48 gabriella ellison
    +15 334 11.12 jessica robinson
    +7 320 11.54 bob ellison
    +PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +101 327 11.48 gabriella ellison
    +15 334 11.12 jessica robinson
    +7 320 11.54 bob ellison
    +PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 3
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + ORDER BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((((((d >= 10.0) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 4 Data size: 44 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 3
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +15 334 11.12 jessica robinson
    +PREHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +15 334 11.12 jessica robinson
    +PREHOOK: query: SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@tbl_pred
    +#### A masked pattern was here ####
    +99.68 65658 4294967503
    +99.91 65763 4294967324
    +99.92 65661 4294967404
    +PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 3
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT t, si, d, s FROM tbl_pred
    + WHERE t > 10
    + AND t <> 101
    + AND d >= ROUND(9.99)
    + AND d < 12
    + AND t IS NOT NULL
    + AND s LIKE '%son'
    + AND s NOT LIKE '%car%'
    + AND t > 0
    + AND si BETWEEN 300 AND 400
    + SORT BY s DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((((((((t > 10) and (t <> 101)) and (d >= 10.0)) and (d < 12.0)) and (s like '%son')) and (not (s like '%car%'))) and (t > 0)) and si BETWEEN 300 AND 400) (type: boolean)
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col3 (type: string)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 3
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT f, i, b FROM tbl_pred
    + WHERE f IS NOT NULL
    + AND f < 123.2
    + AND f > 1.92
    + AND f >= 9.99
    + AND f BETWEEN 1.92 AND 123.2
    + AND i IS NOT NULL
    + AND i < 67627
    + AND i > 60627
    + AND i >= 60626
    + AND i BETWEEN 60626 AND 67627
    + AND b IS NOT NULL
    + AND b < 4294967861
    + AND b > 4294967261
    + AND b >= 4294967260
    + AND b BETWEEN 4294967261 AND 4294967861
    + SORT BY f DESC
    + LIMIT 3
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: tbl_pred
    + filterExpr: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
    + Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((((((((((((f < 123.2) and (f > 1.92)) and (f >= 9.99)) and f BETWEEN 1.92 AND 123.2) and (i < 67627)) and (i > 60627)) and (i >= 60626)) and i BETWEEN 60626 AND 67627) and (b < 4294967861)) and (b > 4294967261)) and (b >= 4294967260)) and b BETWEEN 4294967261 AND 4294967861) (type: boolean)
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: f (type: float), i (type: int), b (type: bigint)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: float)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: int), _col2 (type: bigint)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col0 (type: float)
    + sort order: -
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: int), _col2 (type: bigint)
    + Reduce Operator Tree:
    + Select Operator
    + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + Limit
    + Number of rows: 3
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: 3
    + Processor Tree:
    + ListSink
    +
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11689 : minor flow changes to ORC split generation (Sergey Shelukhin, reviewed by Prasanth Jayachandran and Swarnim Kulkarni) ADDENDUM


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3ff3c6f1
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3ff3c6f1
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3ff3c6f1

    Branch: refs/heads/beeline-cli
    Commit: 3ff3c6f19ab82390f44c88cde5afea32a0299986
    Parents: dbdd611
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Wed Sep 2 11:01:35 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Wed Sep 2 11:01:35 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 18 ++++++++++--------
      1 file changed, 10 insertions(+), 8 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/3ff3c6f1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    index 05efc5f..cf8694e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    @@ -28,6 +28,7 @@ import java.util.Map;
      import java.util.NavigableMap;
      import java.util.TreeMap;
      import java.util.concurrent.Callable;
    +import java.util.concurrent.CompletionService;
      import java.util.concurrent.ExecutorCompletionService;
      import java.util.concurrent.ExecutorService;
      import java.util.concurrent.Executors;
    @@ -373,7 +374,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          private final Configuration conf;
          private static Cache<Path, FileInfo> footerCache;
          private static ExecutorService threadPool = null;
    - private static ExecutorCompletionService<AcidDirInfo> ecs = null;
          private final int numBuckets;
          private final long maxSize;
          private final long minSize;
    @@ -419,7 +419,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
                threadPool = Executors.newFixedThreadPool(numThreads,
                    new ThreadFactoryBuilder().setDaemon(true)
                        .setNameFormat("ORC_GET_SPLITS #%d").build());
    - ecs = new ExecutorCompletionService<AcidDirInfo>(threadPool);
              }

              if (footerCache == null && cacheStripeDetails) {
    @@ -440,7 +439,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
        /**
         * The full ACID directory information needed for splits; no more calls to HDFS needed.
         * We could just live with AcidUtils.Directory but...
    - * 1) That doesn't contain have base files.
    + * 1) That doesn't have base files for the base-directory case.
         * 2) We save fs for convenience to avoid getting it twice.
         */
        @VisibleForTesting
    @@ -1031,17 +1030,18 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          // multi-threaded file statuses and split strategy
          boolean useFileIds = HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS);
          Path[] paths = getInputPaths(conf);
    + CompletionService<AcidDirInfo> ecs = new ExecutorCompletionService<>(Context.threadPool);
          for (Path dir : paths) {
            FileSystem fs = dir.getFileSystem(conf);
            FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds);
    - pathFutures.add(Context.ecs.submit(fileGenerator));
    + pathFutures.add(ecs.submit(fileGenerator));
          }

          // complete path futures and schedule split generation
          try {
            for (int notIndex = 0; notIndex < paths.length; ++notIndex) {
    - AcidDirInfo adi = Context.ecs.take().get();
    - SplitStrategy splitStrategy = determineSplitStrategy(
    + AcidDirInfo adi = ecs.take().get();
    + SplitStrategy<?> splitStrategy = determineSplitStrategy(
                  context, adi.fs, adi.splitPath, adi.acidInfo, adi.baseOrOriginalFiles);

              if (isDebugEnabled) {
    @@ -1049,12 +1049,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
              }

              if (splitStrategy instanceof ETLSplitStrategy) {
    - List<SplitInfo> splitInfos = splitStrategy.getSplits();
    + List<SplitInfo> splitInfos = ((ETLSplitStrategy)splitStrategy).getSplits();
                for (SplitInfo splitInfo : splitInfos) {
                  splitFutures.add(Context.threadPool.submit(new SplitGenerator(splitInfo)));
                }
              } else {
    - splits.addAll(splitStrategy.getSplits());
    + @SuppressWarnings("unchecked")
    + List<OrcSplit> readySplits = (List<OrcSplit>)splitStrategy.getSplits();
    + splits.addAll(readySplits);
              }
            }
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11671 : Optimize RuleRegExp in DPP codepath (Rajesh Balamohan, reviewed by Hari Subramaniyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/492c8b1d
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/492c8b1d
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/492c8b1d

    Branch: refs/heads/beeline-cli
    Commit: 492c8b1d88ffcb68ba4f77a3a49ae8fc768cdd7c
    Parents: 1fc9320
    Author: Hari Subramaniyan <harisankar@apache.org>
    Authored: Wed Sep 2 15:54:23 2015 -0700
    Committer: Hari Subramaniyan <harisankar@apache.org>
    Committed: Wed Sep 2 15:54:23 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/lib/RuleRegExp.java | 22 +++++++++++---------
      1 file changed, 12 insertions(+), 10 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/492c8b1d/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
    index c88ed68..fd5f133 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
    @@ -125,13 +125,13 @@ public class RuleRegExp implements Rule {
         */
        private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
          int numElems = (stack != null ? stack.size() : 0);
    - String name = new String("");
          int patLen = patternWithoutWildCardChar.length();
    -
    + StringBuilder name = new StringBuilder(patLen + numElems);
          for (int pos = numElems - 1; pos >= 0; pos--) {
    - name = stack.get(pos).getName() + "%" + name;
    + String nodeName = stack.get(pos).getName() + "%";
    + name.insert(0, nodeName);
            if (name.length() >= patLen) {
    - if (patternWithoutWildCardChar.equals(name)) {
    + if (patternWithoutWildCardChar.contentEquals(name)) {
                return patLen;
              } else {
                return -1;
    @@ -153,13 +153,14 @@ public class RuleRegExp implements Rule {
        private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
          int numElems = (stack != null ? stack.size() : 0);
          for (String pattern : patternORWildChar) {
    - String name = new String("");
            int patLen = pattern.length();

    + StringBuilder name = new StringBuilder(patLen + numElems);
            for (int pos = numElems - 1; pos >= 0; pos--) {
    - name = stack.get(pos).getName() + "%" + name;
    + String nodeName = stack.get(pos).getName() + "%";
    + name.insert(0, nodeName);
              if (name.length() >= patLen) {
    - if (pattern.equals(name)) {
    + if (pattern.contentEquals(name)) {
                  return patLen;
                } else {
                  break;
    @@ -181,11 +182,12 @@ public class RuleRegExp implements Rule {
         * @throws SemanticException
         */
        private int costPatternWithWildCardChar(Stack<Node> stack) throws SemanticException {
    - int numElems = (stack != null ? stack.size() : 0);
    - String name = "";
    + int numElems = (stack != null ? stack.size() : 0);
    + StringBuilder name = new StringBuilder();
          Matcher m = patternWithWildCardChar.matcher("");
          for (int pos = numElems - 1; pos >= 0; pos--) {
    - name = stack.get(pos).getName() + "%" + name;
    + String nodeName = stack.get(pos).getName() + "%";
    + name.insert(0, nodeName);
            m.reset(name);
            if (m.matches()) {
              return name.length();
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11383: Upgrade Hive to Calcite 1.4 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c40382d4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c40382d4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c40382d4

    Branch: refs/heads/beeline-cli
    Commit: c40382d4c3bd3f0b0c0f1b09affb7d03198f47ba
    Parents: 492c8b1
    Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Authored: Thu Sep 3 11:48:07 2015 +0200
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Sep 3 11:48:07 2015 +0200

    ----------------------------------------------------------------------
      pom.xml | 2 +-
      .../ql/optimizer/calcite/HiveRelOptUtil.java | 23 --
      .../calcite/reloperators/HiveSort.java | 29 +-
      .../rules/HiveJoinProjectTransposeRule.java | 238 ++--------------
      .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
      .../clientpositive/constprog_partitioner.q.out | 30 +-
      .../clientpositive/correlationoptimizer10.q.out | 48 ++--
      .../spark/constprog_partitioner.q.out | 30 +-
      .../clientpositive/spark/subquery_exists.q.out | 12 +-
      .../clientpositive/spark/subquery_in.q.out | 36 +--
      .../spark/vector_mapjoin_reduce.q.out | 22 +-
      .../clientpositive/subquery_exists.q.out | 12 +-
      .../results/clientpositive/subquery_in.q.out | 36 +--
      .../clientpositive/subquery_in_having.q.out | 50 ++--
      .../subquery_unqualcolumnrefs.q.out | 26 +-
      .../results/clientpositive/subquery_views.q.out | 40 +--
      .../clientpositive/tez/explainuser_1.q.out | 278 +++++++++----------
      .../clientpositive/tez/subquery_exists.q.out | 12 +-
      .../clientpositive/tez/subquery_in.q.out | 36 +--
      .../clientpositive/tez/vector_inner_join.q.out | 14 +-
      .../tez/vector_mapjoin_reduce.q.out | 24 +-
      .../clientpositive/vector_inner_join.q.out | 12 +-
      .../clientpositive/vector_mapjoin_reduce.q.out | 26 +-
      23 files changed, 431 insertions(+), 610 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/pom.xml
    ----------------------------------------------------------------------
    diff --git a/pom.xml b/pom.xml
    index 15c2805..b55e86a 100644
    --- a/pom.xml
    +++ b/pom.xml
    @@ -103,7 +103,7 @@
          <antlr.version>3.4</antlr.version>
          <avro.version>1.7.7</avro.version>
          <bonecp.version>0.8.0.RELEASE</bonecp.version>
    - <calcite.version>1.3.0-incubating</calcite.version>
    + <calcite.version>1.4.0-incubating</calcite.version>
          <datanucleus-api-jdo.version>3.2.6</datanucleus-api-jdo.version>
          <datanucleus-core.version>3.2.10</datanucleus-core.version>
          <datanucleus-rdbms.version>3.2.9</datanucleus-rdbms.version>

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
    index 5a5954d..0e282b8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
    @@ -278,29 +278,6 @@ public class HiveRelOptUtil extends RelOptUtil {
          }
        }

    - private static SqlOperator op(SqlKind kind, SqlOperator operator) {
    - switch (kind) {
    - case EQUALS:
    - return SqlStdOperatorTable.EQUALS;
    - case NOT_EQUALS:
    - return SqlStdOperatorTable.NOT_EQUALS;
    - case GREATER_THAN:
    - return SqlStdOperatorTable.GREATER_THAN;
    - case GREATER_THAN_OR_EQUAL:
    - return SqlStdOperatorTable.GREATER_THAN_OR_EQUAL;
    - case LESS_THAN:
    - return SqlStdOperatorTable.LESS_THAN;
    - case LESS_THAN_OR_EQUAL:
    - return SqlStdOperatorTable.LESS_THAN_OR_EQUAL;
    - case IS_DISTINCT_FROM:
    - return SqlStdOperatorTable.IS_DISTINCT_FROM;
    - case IS_NOT_DISTINCT_FROM:
    - return SqlStdOperatorTable.IS_NOT_DISTINCT_FROM;
    - default:
    - return operator;
    - }
    - }
    -
        private static void addJoinKey(
            List<RexNode> joinKeyList,
            RexNode key,

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
    index 18d2838..1df6542 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSort.java
    @@ -22,6 +22,7 @@ import java.util.Map;
      import org.apache.calcite.plan.RelOptCluster;
      import org.apache.calcite.plan.RelTraitSet;
      import org.apache.calcite.rel.RelCollation;
    +import org.apache.calcite.rel.RelCollationTraitDef;
      import org.apache.calcite.rel.RelNode;
      import org.apache.calcite.rel.core.RelFactories;
      import org.apache.calcite.rel.core.Sort;
    @@ -49,6 +50,24 @@ public class HiveSort extends Sort implements HiveRelNode {
              offset, fetch);
        }

    + /**
    + * Creates a HiveSort.
    + *
    + * @param input Input relational expression
    + * @param collation array of sort specifications
    + * @param offset Expression for number of rows to discard before returning
    + * first row
    + * @param fetch Expression for number of rows to fetch
    + */
    + public static HiveSort create(RelNode input, RelCollation collation,
    + RexNode offset, RexNode fetch) {
    + RelOptCluster cluster = input.getCluster();
    + collation = RelCollationTraitDef.INSTANCE.canonize(collation);
    + RelTraitSet traitSet =
    + TraitsUtil.getSortTraitSet(cluster, input.getTraitSet(), collation);
    + return new HiveSort(cluster, traitSet, input, collation, offset, fetch);
    + }
    +
        @Override
        public HiveSort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation,
            RexNode offset, RexNode fetch) {
    @@ -77,9 +96,15 @@ public class HiveSort extends Sort implements HiveRelNode {
        private static class HiveSortRelFactory implements RelFactories.SortFactory {

          @Override
    - public RelNode createSort(RelTraitSet traits, RelNode child, RelCollation collation,
    + public RelNode createSort(RelTraitSet traits, RelNode input, RelCollation collation,
              RexNode offset, RexNode fetch) {
    - return new HiveSort(child.getCluster(), traits, child, collation, offset, fetch);
    + return createSort(input, collation, offset, fetch);
    + }
    +
    + @Override
    + public RelNode createSort(RelNode input, RelCollation collation, RexNode offset,
    + RexNode fetch) {
    + return create(input, collation, offset, fetch);
          }
        }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
    index fd8f5cb..ac72ee5 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java
    @@ -49,14 +49,14 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
                    operand(HiveProject.class, any()),
                    operand(HiveProject.class, any())),
                "JoinProjectTransposeRule(Project-Project)",
    - HiveProject.DEFAULT_PROJECT_FACTORY);
    + false, HiveProject.DEFAULT_PROJECT_FACTORY);

        public static final HiveJoinProjectTransposeRule LEFT_PROJECT =
            new HiveJoinProjectTransposeRule(
                operand(HiveJoin.class,
                    some(operand(HiveProject.class, any()))),
                "JoinProjectTransposeRule(Project-Other)",
    - HiveProject.DEFAULT_PROJECT_FACTORY);
    + false, HiveProject.DEFAULT_PROJECT_FACTORY);

        public static final HiveJoinProjectTransposeRule RIGHT_PROJECT =
            new HiveJoinProjectTransposeRule(
    @@ -65,219 +65,37 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule {
                    operand(RelNode.class, any()),
                    operand(HiveProject.class, any())),
                "JoinProjectTransposeRule(Other-Project)",
    - HiveProject.DEFAULT_PROJECT_FACTORY);
    + false, HiveProject.DEFAULT_PROJECT_FACTORY);

    - private final ProjectFactory projectFactory;
    -
    -
    - private HiveJoinProjectTransposeRule(
    - RelOptRuleOperand operand,
    - String description, ProjectFactory pFactory) {
    - super(operand, description, pFactory);
    - this.projectFactory = pFactory;
    - }
    -
    - @Override
    - public void onMatch(RelOptRuleCall call) {
    - Join joinRel = call.rel(0);
    - JoinRelType joinType = joinRel.getJoinType();
    -
    - Project leftProj;
    - Project rightProj;
    - RelNode leftJoinChild;
    - RelNode rightJoinChild;
    -
    - // see if at least one input's projection doesn't generate nulls
    - if (hasLeftChild(call)) {
    - leftProj = call.rel(1);
    - leftJoinChild = getProjectChild(call, leftProj, true);
    - } else {
    - leftProj = null;
    - leftJoinChild = call.rel(1);
    - }
    - if (hasRightChild(call)) {
    - rightProj = getRightChild(call);
    - rightJoinChild = getProjectChild(call, rightProj, false);
    - } else {
    - rightProj = null;
    - rightJoinChild = joinRel.getRight();
    - }
    - if ((leftProj == null) && (rightProj == null)) {
    - return;
    - }
    -
    - // Construct two RexPrograms and combine them. The bottom program
    - // is a join of the projection expressions from the left and/or
    - // right projects that feed into the join. The top program contains
    - // the join condition.
    -
    - // Create a row type representing a concatenation of the inputs
    - // underneath the projects that feed into the join. This is the input
    - // into the bottom RexProgram. Note that the join type is an inner
    - // join because the inputs haven't actually been joined yet.
    - RelDataType joinChildrenRowType =
    - Join.deriveJoinRowType(
    - leftJoinChild.getRowType(),
    - rightJoinChild.getRowType(),
    - JoinRelType.INNER,
    - joinRel.getCluster().getTypeFactory(),
    - null,
    - Collections.<RelDataTypeField>emptyList());
    -
    - // Create projection expressions, combining the projection expressions
    - // from the projects that feed into the join. For the RHS projection
    - // expressions, shift them to the right by the number of fields on
    - // the LHS. If the join input was not a projection, simply create
    - // references to the inputs.
    - int nProjExprs = joinRel.getRowType().getFieldCount();
    - List<Pair<RexNode, String>> projects =
    - new ArrayList<Pair<RexNode, String>>();
    - RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
    -
    - createProjectExprs(
    - leftProj,
    - leftJoinChild,
    - 0,
    - rexBuilder,
    - joinChildrenRowType.getFieldList(),
    - projects);
    -
    - List<RelDataTypeField> leftFields =
    - leftJoinChild.getRowType().getFieldList();
    - int nFieldsLeft = leftFields.size();
    - createProjectExprs(
    - rightProj,
    - rightJoinChild,
    - nFieldsLeft,
    - rexBuilder,
    - joinChildrenRowType.getFieldList(),
    - projects);
    -
    - List<RelDataType> projTypes = new ArrayList<RelDataType>();
    - for (int i = 0; i < nProjExprs; i++) {
    - projTypes.add(projects.get(i).left.getType());
    - }
    - RelDataType projRowType =
    - rexBuilder.getTypeFactory().createStructType(
    - projTypes,
    - Pair.right(projects));
    -
    - // create the RexPrograms and merge them
    - RexProgram bottomProgram =
    - RexProgram.create(
    - joinChildrenRowType,
    - Pair.left(projects),
    - null,
    - projRowType,
    - rexBuilder);
    - RexProgramBuilder topProgramBuilder =
    - new RexProgramBuilder(
    - projRowType,
    - rexBuilder);
    - topProgramBuilder.addIdentity();
    - topProgramBuilder.addCondition(joinRel.getCondition());
    - RexProgram topProgram = topProgramBuilder.getProgram();
    - RexProgram mergedProgram =
    - RexProgramBuilder.mergePrograms(
    - topProgram,
    - bottomProgram,
    - rexBuilder);
    + public static final HiveJoinProjectTransposeRule BOTH_PROJECT_INCLUDE_OUTER =
    + new HiveJoinProjectTransposeRule(
    + operand(HiveJoin.class,
    + operand(HiveProject.class, any()),
    + operand(HiveProject.class, any())),
    + "Join(IncludingOuter)ProjectTransposeRule(Project-Project)",
    + true, HiveProject.DEFAULT_PROJECT_FACTORY);

    - // expand out the join condition and construct a new LogicalJoin that
    - // directly references the join children without the intervening
    - // ProjectRels
    - RexNode newCondition =
    - mergedProgram.expandLocalRef(
    - mergedProgram.getCondition());
    - Join newJoinRel =
    - joinRel.copy(joinRel.getTraitSet(), newCondition,
    - leftJoinChild, rightJoinChild, joinRel.getJoinType(),
    - joinRel.isSemiJoinDone());
    + public static final HiveJoinProjectTransposeRule LEFT_PROJECT_INCLUDE_OUTER =
    + new HiveJoinProjectTransposeRule(
    + operand(HiveJoin.class,
    + some(operand(HiveProject.class, any()))),
    + "Join(IncludingOuter)ProjectTransposeRule(Project-Other)",
    + true, HiveProject.DEFAULT_PROJECT_FACTORY);

    - // expand out the new projection expressions; if the join is an
    - // outer join, modify the expressions to reference the join output
    - List<RexNode> newProjExprs = new ArrayList<RexNode>();
    - List<RexLocalRef> projList = mergedProgram.getProjectList();
    - List<RelDataTypeField> newJoinFields =
    - newJoinRel.getRowType().getFieldList();
    - int nJoinFields = newJoinFields.size();
    - int[] adjustments = new int[nJoinFields];
    - for (int i = 0; i < nProjExprs; i++) {
    - RexNode newExpr = mergedProgram.expandLocalRef(projList.get(i));
    - if (joinType != JoinRelType.INNER) {
    - newExpr =
    - newExpr.accept(
    - new RelOptUtil.RexInputConverter(
    - rexBuilder,
    - joinChildrenRowType.getFieldList(),
    - newJoinFields,
    - adjustments));
    - }
    - newProjExprs.add(newExpr);
    - }
    + public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_INCLUDE_OUTER =
    + new HiveJoinProjectTransposeRule(
    + operand(
    + HiveJoin.class,
    + operand(RelNode.class, any()),
    + operand(HiveProject.class, any())),
    + "Join(IncludingOuter)ProjectTransposeRule(Other-Project)",
    + true, HiveProject.DEFAULT_PROJECT_FACTORY);

    - // finally, create the projection on top of the join
    - RelNode newProjRel = projectFactory.createProject(newJoinRel, newProjExprs,
    - joinRel.getRowType().getFieldNames());

    - call.transformTo(newProjRel);
    + private HiveJoinProjectTransposeRule(
    + RelOptRuleOperand operand, String description,
    + boolean includeOuter, ProjectFactory projectFactory) {
    + super(operand, description, includeOuter, projectFactory);
        }

    - /**
    - * Creates projection expressions corresponding to one of the inputs into
    - * the join
    - *
    - * @param projRel the projection input into the join (if it exists)
    - * @param joinChild the child of the projection input (if there is a
    - * projection); otherwise, this is the join input
    - * @param adjustmentAmount the amount the expressions need to be shifted by
    - * @param rexBuilder rex builder
    - * @param joinChildrenFields concatenation of the fields from the left and
    - * right join inputs (once the projections have been
    - * removed)
    - * @param projects Projection expressions &amp; names to be created
    - */
    - private void createProjectExprs(
    - Project projRel,
    - RelNode joinChild,
    - int adjustmentAmount,
    - RexBuilder rexBuilder,
    - List<RelDataTypeField> joinChildrenFields,
    - List<Pair<RexNode, String>> projects) {
    - List<RelDataTypeField> childFields =
    - joinChild.getRowType().getFieldList();
    - if (projRel != null) {
    - List<Pair<RexNode, String>> namedProjects =
    - projRel.getNamedProjects();
    - int nChildFields = childFields.size();
    - int[] adjustments = new int[nChildFields];
    - for (int i = 0; i < nChildFields; i++) {
    - adjustments[i] = adjustmentAmount;
    - }
    - for (Pair<RexNode, String> pair : namedProjects) {
    - RexNode e = pair.left;
    - if (adjustmentAmount != 0) {
    - // shift the references by the adjustment amount
    - e = e.accept(
    - new RelOptUtil.RexInputConverter(
    - rexBuilder,
    - childFields,
    - joinChildrenFields,
    - adjustments));
    - }
    - projects.add(Pair.of(e, pair.right));
    - }
    - } else {
    - // no projection; just create references to the inputs
    - for (int i = 0; i < childFields.size(); i++) {
    - final RelDataTypeField field = childFields.get(i);
    - projects.add(
    - Pair.of(
    - (RexNode) rexBuilder.makeInputRef(
    - field.getType(),
    - i + adjustmentAmount),
    - field.getName()));
    - }
    - }
    - }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
    index f26d1df..73ae7c4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
    @@ -914,8 +914,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
            if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
              // 6.1. Merge join into multijoin operators (if possible)
              calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(),
    - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT,
    - HiveJoinProjectTransposeRule.LEFT_PROJECT, HiveJoinProjectTransposeRule.RIGHT_PROJECT,
    + HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER,
    + HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER,
    + HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER,
                      HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE);
              // The previous rules can pull up projections through join operators,
              // thus we run the field trimmer again to push them back down

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/constprog_partitioner.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
    index 6475fa7..e22a4a9 100644
    --- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
    +++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
    @@ -111,42 +111,42 @@ STAGE PLANS:
                    predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
                    Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
    - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
    - outputColumnNames: _col0, _col1, _col2, _col3
    + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
    + outputColumnNames: _col0, _col1, _col2
                      Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
    - key expressions: _col0 (type: int), _col3 (type: int)
    - sort order: ++
    - Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
                        Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: int), _col2 (type: int)
                TableScan
                  alias: li
                  Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                    Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
    - expressions: l_orderkey (type: int), l_linenumber (type: int)
    - outputColumnNames: _col0, _col1
    + expressions: l_orderkey (type: int)
    + outputColumnNames: _col0
                      Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
    - keys: _col0 (type: int), _col1 (type: int)
    + keys: _col0 (type: int)
                        mode: hash
    - outputColumnNames: _col0, _col1
    + outputColumnNames: _col0
                        Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                        Reduce Output Operator
    - key expressions: _col0 (type: int), _col1 (type: int)
    - sort order: ++
    - Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Join Operator
                condition map:
                     Left Semi Join 0 to 1
                keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                outputColumnNames: _col1, _col2
                Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
                Select Operator

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
    index 14a6ade..90fb6f4 100644
    --- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
    +++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out
    @@ -474,17 +474,17 @@ STAGE PLANS:
                  alias: xx
                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -581,17 +581,17 @@ STAGE PLANS:
                  alias: xx
                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 20.0)) and key is not null) (type: boolean)
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  alias: x
    @@ -625,9 +625,9 @@ STAGE PLANS:
                        Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Demux Operator
    - Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
                Mux Operator
    - Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
                  Join Operator
                    condition map:
                         Left Semi Join 0 to 1
    @@ -652,7 +652,7 @@ STAGE PLANS:
                  outputColumnNames: _col0
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  Mux Operator
    - Statistics: Num rows: 69 Data size: 693 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 57 Data size: 601 Basic stats: COMPLETE Column stats: NONE
                    Join Operator
                      condition map:
                           Left Semi Join 0 to 1
    @@ -802,17 +802,17 @@ STAGE PLANS:
                  alias: xx
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -828,10 +828,10 @@ STAGE PLANS:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 33 Data size: 358 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -921,17 +921,17 @@ STAGE PLANS:
                  alias: xx
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((UDFToDouble(key) < 200.0) and (UDFToDouble(key) > 180.0)) and key is not null) (type: boolean)
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  alias: xx
    @@ -965,9 +965,9 @@ STAGE PLANS:
                        Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Demux Operator
    - Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
                Mux Operator
    - Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
                  Join Operator
                    condition map:
                         Left Semi Join 0 to 1
    @@ -992,7 +992,7 @@ STAGE PLANS:
                  outputColumnNames: _col0
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  Mux Operator
    - Statistics: Num rows: 306 Data size: 3250 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 84 Data size: 891 Basic stats: COMPLETE Column stats: NONE
                    Join Operator
                      condition map:
                           Left Semi Join 0 to 1

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
    index 665bfce..3a3a751 100644
    --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
    +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out
    @@ -124,13 +124,13 @@ STAGE PLANS:
                          predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
                          Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
    - outputColumnNames: _col0, _col1, _col2, _col3
    + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
    + outputColumnNames: _col0, _col1, _col2
                            Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
    - key expressions: _col0 (type: int), _col3 (type: int)
    - sort order: ++
    - Map-reduce partition columns: _col0 (type: int), _col3 (type: int)
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
                              Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                              value expressions: _col1 (type: int), _col2 (type: int)
              Map 3
    @@ -139,21 +139,21 @@ STAGE PLANS:
                        alias: li
                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_linenumber (type: int)
    - outputColumnNames: _col0, _col1
    + expressions: l_orderkey (type: int)
    + outputColumnNames: _col0
                            Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
    - keys: _col0 (type: int), _col1 (type: int)
    + keys: _col0 (type: int)
                              mode: hash
    - outputColumnNames: _col0, _col1
    + outputColumnNames: _col0
                              Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                              Reduce Output Operator
    - key expressions: _col0 (type: int), _col1 (type: int)
    - sort order: ++
    - Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
                                Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
              Reducer 2
                  Reduce Operator Tree:
    @@ -161,8 +161,8 @@ STAGE PLANS:
                      condition map:
                           Left Semi Join 0 to 1
                      keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                      outputColumnNames: _col1, _col2
                      Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE
                      Select Operator

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
    index 28eda26..44f467b 100644
    --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
    +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
    @@ -41,17 +41,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (value is not null and key is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((value > 'val_9') and key is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col1 (type: string), _col0 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -83,10 +83,10 @@ STAGE PLANS:
                        0 _col1 (type: string), _col0 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/subquery_in.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
    index 00b3399..bfcdaa8 100644
    --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
    +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
    @@ -31,17 +31,17 @@ STAGE PLANS:
                        alias: src
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key > '9') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string)
                              sort order: +
                              Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                              value expressions: _col1 (type: string)
              Map 3
                  Map Operator Tree:
    @@ -74,10 +74,10 @@ STAGE PLANS:
                        0 _col0 (type: string)
                        1 _col0 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -149,17 +149,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string), _col1 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -191,10 +191,10 @@ STAGE PLANS:
                        0 _col0 (type: string), _col1 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -627,17 +627,17 @@ STAGE PLANS:
                        alias: b
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
                            expressions: key (type: string), value (type: string)
                            outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: string), _col1 (type: string)
                              sort order: ++
                              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
              Map 3
                  Map Operator Tree:
                      TableScan
    @@ -669,10 +669,10 @@ STAGE PLANS:
                        0 _col0 (type: string), _col1 (type: string)
                        1 _col0 (type: string), _col1 (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
    index a2dd910..be39d0d 100644
    --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
    +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
    @@ -205,21 +205,21 @@ STAGE PLANS:
                        alias: lineitem
                        Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
                        Filter Operator
    - predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean)
    + predicate: (((l_shipmode = 'AIR') and (l_linenumber = 1)) and l_orderkey is not null) (type: boolean)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_linenumber (type: int)
    - outputColumnNames: _col0, _col1
    + expressions: l_orderkey (type: int)
    + outputColumnNames: _col0
                            Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
    - keys: _col0 (type: int), _col1 (type: int)
    + keys: _col0 (type: int)
                              mode: hash
    - outputColumnNames: _col0, _col1
    + outputColumnNames: _col0
                              Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                              Spark HashTable Sink Operator
                                keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                  Local Work:
                    Map Reduce Local Work

    @@ -236,15 +236,15 @@ STAGE PLANS:
                          predicate: (((l_linenumber = 1) and l_orderkey is not null) and l_partkey is not null) (type: boolean)
                          Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                          Select Operator
    - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int)
    - outputColumnNames: _col0, _col1, _col2, _col3
    + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
    + outputColumnNames: _col0, _col1, _col2
                            Statistics: Num rows: 13 Data size: 1559 Basic stats: COMPLETE Column stats: NONE
                            Map Join Operator
                              condition map:
                                   Left Semi Join 0 to 1
                              keys:
    - 0 _col0 (type: int), _col3 (type: int)
    - 1 _col0 (type: int), _col1 (type: int)
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
                              outputColumnNames: _col1, _col2
                              input vertices:
                                1 Map 2

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_exists.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
    index 776b145..ace5efe 100644
    --- a/ql/src/test/results/clientpositive/subquery_exists.q.out
    +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
    @@ -36,17 +36,17 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (value is not null and key is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((value > 'val_9') and key is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col1 (type: string), _col0 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    @@ -75,10 +75,10 @@ STAGE PLANS:
                  0 _col1 (type: string), _col0 (type: string)
                  1 _col0 (type: string), _col1 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/subquery_in.q.out b/ql/src/test/results/clientpositive/subquery_in.q.out
    index 7c53638..f82c799 100644
    --- a/ql/src/test/results/clientpositive/subquery_in.q.out
    +++ b/ql/src/test/results/clientpositive/subquery_in.q.out
    @@ -26,17 +26,17 @@ STAGE PLANS:
                  alias: src
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key > '9') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  alias: src
    @@ -66,10 +66,10 @@ STAGE PLANS:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -136,17 +136,17 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    @@ -175,10 +175,10 @@ STAGE PLANS:
                  0 _col0 (type: string), _col1 (type: string)
                  1 _col0 (type: string), _col1 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -671,17 +671,17 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  Reduce Output Operator
                    key expressions: _col0 (type: string), _col1 (type: string)
    @@ -696,10 +696,10 @@ STAGE PLANS:
                  0 _col0 (type: string), _col1 (type: string)
                  1 _col0 (type: string), _col1 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_in_having.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
    index 8cabfa7..da1da06 100644
    --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
    +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
    @@ -782,17 +782,17 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key > '8') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col1 (type: string)
                TableScan
                  alias: b
    @@ -822,13 +822,13 @@ STAGE PLANS:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                Group By Operator
                  aggregations: count()
                  keys: _col0 (type: string), _col1 (type: string)
                  mode: hash
                  outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    table:
    @@ -844,7 +844,7 @@ STAGE PLANS:
                    key expressions: _col0 (type: string), _col1 (type: string)
                    sort order: ++
                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col2 (type: bigint)
            Reduce Operator Tree:
              Group By Operator
    @@ -852,10 +852,10 @@ STAGE PLANS:
                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                mode: mergepartial
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                Filter Operator
                  predicate: _col2 is not null (type: boolean)
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    table:
    @@ -871,7 +871,7 @@ STAGE PLANS:
                    key expressions: _col2 (type: bigint)
                    sort order: +
                    Map-reduce partition columns: _col2 (type: bigint)
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col0 (type: string), _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -887,10 +887,10 @@ STAGE PLANS:
                  0 _col2 (type: bigint)
                  1 _col0 (type: bigint)
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -1079,10 +1079,10 @@ STAGE PLANS:
                      0 _col2 (type: bigint)
                      1 _col0 (type: bigint)
                    outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                    File Output Operator
                      compressed: false
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                      table:
                          input format: org.apache.hadoop.mapred.TextInputFormat
                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -1098,7 +1098,7 @@ STAGE PLANS:
                    key expressions: _col2 (type: bigint)
                    sort order: +
                    Map-reduce partition columns: _col2 (type: bigint)
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col0 (type: string), _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -1114,10 +1114,10 @@ STAGE PLANS:
                  0 _col2 (type: bigint)
                  1 _col0 (type: bigint)
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -1158,12 +1158,12 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key > '8') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Semi Join 0 to 1
    @@ -1171,18 +1171,18 @@ STAGE PLANS:
                          0 _col0 (type: string)
                          1 _col0 (type: string)
                        outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                        Group By Operator
                          aggregations: count()
                          keys: _col0 (type: string), _col1 (type: string)
                          mode: hash
                          outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                          Reduce Output Operator
                            key expressions: _col0 (type: string), _col1 (type: string)
                            sort order: ++
                            Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                            value expressions: _col2 (type: bigint)
            Local Work:
              Map Reduce Local Work
    @@ -1192,10 +1192,10 @@ STAGE PLANS:
                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                mode: mergepartial
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                Filter Operator
                  predicate: _col2 is not null (type: boolean)
    - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    table:

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
    index 5d6d4a8..76d7503 100644
    --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
    +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
    @@ -52,7 +52,7 @@ STAGE PLANS:
                  alias: src11
                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                  Filter Operator
    - predicate: (key1 is not null and value1 is not null) (type: boolean)
    + predicate: ((key1 > '9') and value1 is not null) (type: boolean)
                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
                    Select Operator
                      expressions: key1 (type: string), value1 (type: string)
    @@ -122,17 +122,17 @@ STAGE PLANS:
                  alias: a
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  alias: a
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    @@ -161,10 +161,10 @@ STAGE PLANS:
                  0 _col0 (type: string), _col1 (type: string)
                  1 _col0 (type: string), _col1 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -588,17 +588,17 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: (key is not null and value is not null) (type: boolean)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + predicate: ((key > '9') and value is not null) (type: boolean)
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  Reduce Output Operator
                    key expressions: _col0 (type: string), _col1 (type: string)
    @@ -613,10 +613,10 @@ STAGE PLANS:
                  0 _col0 (type: string), _col1 (type: string)
                  1 _col0 (type: string), _col1 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/c40382d4/ql/src/test/results/clientpositive/subquery_views.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out
    index 41834a3..c59d86e 100644
    --- a/ql/src/test/results/clientpositive/subquery_views.q.out
    +++ b/ql/src/test/results/clientpositive/subquery_views.q.out
    @@ -70,7 +70,7 @@ POSTHOOK: Input: default@src
      POSTHOOK: Output: database:default
      POSTHOOK: Output: default@cv2
      Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
    -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
    +Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
      PREHOOK: query: explain
      select *
      from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
    @@ -97,7 +97,7 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
    + predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    @@ -135,15 +135,15 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: key is not null (type: boolean)
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + predicate: (key < '11') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string)
                      outputColumnNames: _col0, _col1
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        sort order:
    - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col0 (type: string), _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -157,7 +157,7 @@ STAGE PLANS:
                  0
                  1
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
                  table:
    @@ -173,22 +173,22 @@ STAGE PLANS:
                    key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string)
                    sort order: +++
                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string)
    - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
                TableScan
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: ((value > 'val_11') and key is not null) (type: boolean)
    - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
    + predicate: (((value > 'val_11') and (key < '11')) and key is not null) (type: boolean)
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: key (type: string), value (type: string), key (type: string)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                        sort order: +++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
    - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Join Operator
                condition map:
    @@ -197,14 +197,14 @@ STAGE PLANS:
                  0 _col0 (type: string), _col1 (type: string), _col0 (type: string)
                  1 _col0 (type: string), _col1 (type: string), _col2 (type: string)
                outputColumnNames: _col0, _col1, _col3
    - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE
                Filter Operator
                  predicate: _col3 is null (type: boolean)
    - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
                    expressions: _col0 (type: string), _col1 (type: string)
                    outputColumnNames: _col0, _col1
    - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
                    File Output Operator
                      compressed: false
                      table:
    @@ -220,7 +220,7 @@ STAGE PLANS:
                    key expressions: _col0 (type: string)
                    sort order: +
                    Map-reduce partition columns: _col0 (type: string)
    - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col1 (type: string)
                TableScan
                  Reduce Output Operator
    @@ -236,10 +236,10 @@ STAGE PLANS:
                  0 _col0 (type: string)
                  1 _col0 (type: string)
                outputColumnNames: _col0, _col1
    - Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 166 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 110 Data size: 1172 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -252,7 +252,7 @@ STAGE PLANS:
                  alias: b
                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
    - predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
    + predicate: ((value > 'val_11') and (key is null or value is null or key is null)) (type: boolean)
                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    @@ -379,7 +379,7 @@ STAGE PLANS:
              ListSink

      Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
    -Warning: Shuffle Join JOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
    +Warning: Shuffle Join JOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-6:MAPRED' is a cross product
      PREHOOK: query: select *
      from cv2 where cv2.key in (select key from cv2 c where c.key < '11')
      PREHOOK: type: QUERY
  • Xuf at Sep 9, 2015 at 7:08 am
    HIVE-11712: Duplicate groupby keys cause ClassCastException (Jimmy, reviewed by Xuefu)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb4f5e70
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb4f5e70
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb4f5e70

    Branch: refs/heads/beeline-cli
    Commit: bb4f5e702b11720ca54e43ba4a6c3aff099b0f4c
    Parents: c40382d
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Tue Sep 1 11:48:36 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Thu Sep 3 09:57:23 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +
      .../queries/clientpositive/groupby1_map_nomap.q | 2 +
      ql/src/test/queries/clientpositive/groupby6.q | 2 +
      .../clientpositive/groupby_grouping_id2.q | 2 +
      .../clientpositive/groupby_ppr_multi_distinct.q | 2 +
      ql/src/test/queries/clientpositive/having2.q | 27 +
      .../clientpositive/groupby1_map_nomap.q.out | 8 +-
      .../test/results/clientpositive/groupby6.q.out | 8 +-
      .../clientpositive/groupby_duplicate_key.q.out | 16 +-
      .../clientpositive/groupby_grouping_id2.q.out | 28 +-
      .../groupby_ppr_multi_distinct.q.out | 8 +-
      .../test/results/clientpositive/having2.q.out | 353 ++++++++++++
      .../spark/groupby1_map_nomap.q.out | 564 ++++++++++---------
      .../results/clientpositive/spark/groupby6.q.out | 20 +-
      .../spark/groupby_grouping_id2.q.out | 38 +-
      .../spark/groupby_ppr_multi_distinct.q.out | 16 +-
      16 files changed, 761 insertions(+), 337 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    index b809a23..778c7b2 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    @@ -4620,6 +4620,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
                groupByInputRowResolver);

    + if (ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
    + // Skip duplicated grouping keys
    + continue;
    + }
            groupByKeys.add(grpByExprNode);
            String field = getColumnInternalName(i);
            outputColumnNames.add(field);

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
    index eb09a9c..b22a61e 100644
    --- a/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
    +++ b/ql/src/test/queries/clientpositive/groupby1_map_nomap.q
    @@ -2,6 +2,8 @@ set hive.map.aggr=true;
      set hive.groupby.skewindata=false;
      set hive.groupby.mapaggr.checkinterval=20;

    +-- SORT_QUERY_RESULTS
    +
      CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE;

      EXPLAIN

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby6.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/groupby6.q b/ql/src/test/queries/clientpositive/groupby6.q
    index 3a3cc58..17597cb 100755
    --- a/ql/src/test/queries/clientpositive/groupby6.q
    +++ b/ql/src/test/queries/clientpositive/groupby6.q
    @@ -1,6 +1,8 @@
      set hive.map.aggr=false;
      set hive.groupby.skewindata=true;

    +-- SORT_QUERY_RESULTS
    +
      CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE;

      EXPLAIN

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
    index f451f17..5c05aad 100644
    --- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
    +++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
    @@ -4,6 +4,8 @@ LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;

      set hive.groupby.skewindata = true;

    +-- SORT_QUERY_RESULTS
    +
      SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;

      SELECT GROUPING__ID, count(*)

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
    index 20c73bd..1249853 100644
    --- a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
    +++ b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
    @@ -1,6 +1,8 @@
      set hive.map.aggr=false;
      set hive.groupby.skewindata=false;

    +-- SORT_QUERY_RESULTS
    +
      CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;

      EXPLAIN EXTENDED

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/queries/clientpositive/having2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/having2.q b/ql/src/test/queries/clientpositive/having2.q
    index 282b2c0..83ae1e1 100644
    --- a/ql/src/test/queries/clientpositive/having2.q
    +++ b/ql/src/test/queries/clientpositive/having2.q
    @@ -63,3 +63,30 @@ SELECT customer_name, SUM(customer_balance), SUM(order_quantity) FROM default.te
      (SUM(customer_balance) <= 4074689.000000041)
      AND (COUNT(s1.discount) <= 822)
      );
    +
    +explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +);
    +
    +explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name, s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +);
    +
    +explain
    +SELECT distinct s1.customer_name as x, s1.customer_name as y
    +FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +);

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
    index cc985a5..7cdf240 100644
    --- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
    +++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out
    @@ -1,8 +1,12 @@
    -PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
      PREHOOK: type: CREATETABLE
      PREHOOK: Output: database:default
      PREHOOK: Output: default@dest1
    -POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE
      POSTHOOK: type: CREATETABLE
      POSTHOOK: Output: database:default
      POSTHOOK: Output: default@dest1

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby6.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
    index b790224..d8cb2ac 100644
    --- a/ql/src/test/results/clientpositive/groupby6.q.out
    +++ b/ql/src/test/results/clientpositive/groupby6.q.out
    @@ -1,8 +1,12 @@
    -PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
      PREHOOK: type: CREATETABLE
      PREHOOK: Output: database:default
      PREHOOK: Output: default@dest1
    -POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE
      POSTHOOK: type: CREATETABLE
      POSTHOOK: Output: database:default
      POSTHOOK: Output: default@dest1

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
    index 2f2a6e6..fc95f41 100644
    --- a/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
    +++ b/ql/src/test/results/clientpositive/groupby_duplicate_key.q.out
    @@ -21,14 +21,14 @@ STAGE PLANS:
                    outputColumnNames: key
                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    Group By Operator
    - keys: key (type: string), '' (type: string), '' (type: string)
    + keys: key (type: string), '' (type: string)
                      mode: hash
    - outputColumnNames: _col0, _col1, _col2
    + outputColumnNames: _col0, _col1
                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
    - key expressions: _col0 (type: string), _col2 (type: string)
    + key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
    - Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
    + Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Group By Operator
    @@ -99,14 +99,14 @@ STAGE PLANS:
                    outputColumnNames: key
                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                    Group By Operator
    - keys: key (type: string), 'X' (type: string), 'X' (type: string)
    + keys: key (type: string), 'X' (type: string)
                      mode: hash
    - outputColumnNames: _col0, _col1, _col2
    + outputColumnNames: _col0, _col1
                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
    - key expressions: _col0 (type: string), _col2 (type: string)
    + key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
    - Map-reduce partition columns: _col0 (type: string), _col2 (type: string)
    + Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
            Reduce Operator Tree:
              Group By Operator

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
    index 4a0a9d2..544a7ae 100644
    --- a/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
    +++ b/ql/src/test/results/clientpositive/groupby_grouping_id2.q.out
    @@ -14,25 +14,29 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt
      POSTHOOK: type: LOAD
      #### A masked pattern was here ####
      POSTHOOK: Output: default@t1
    -PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
      PREHOOK: type: QUERY
      PREHOOK: Input: default@t1
      #### A masked pattern was here ####
    -POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
      POSTHOOK: type: QUERY
      POSTHOOK: Input: default@t1
      #### A masked pattern was here ####
    -NULL NULL 0 6
    +1 1 3 1
      1 NULL 1 2
      1 NULL 3 1
    -1 1 3 1
    -2 NULL 1 1
      2 2 3 1
    +2 NULL 1 1
    +3 3 3 1
      3 NULL 1 2
      3 NULL 3 1
    -3 3 3 1
    -4 NULL 1 1
      4 5 3 1
    +4 NULL 1 1
    +NULL NULL 0 6
      PREHOOK: query: SELECT GROUPING__ID, count(*)
      FROM
      (
    @@ -129,17 +133,17 @@ POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key,
      POSTHOOK: type: QUERY
      POSTHOOK: Input: default@t1
      #### A masked pattern was here ####
    -NULL NULL 0 6
    +1 1 3 1
      1 NULL 1 2
      1 NULL 3 1
    -1 1 3 1
    -2 NULL 1 1
      2 2 3 1
    +2 NULL 1 1
    +3 3 3 1
      3 NULL 1 2
      3 NULL 3 1
    -3 3 3 1
    -4 NULL 1 1
      4 5 3 1
    +4 NULL 1 1
    +NULL NULL 0 6
      PREHOOK: query: SELECT GROUPING__ID, count(*)
      FROM
      (

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
    index c50abde..6eb3f66 100644
    --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
    +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
    @@ -1,8 +1,12 @@
    -PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
      PREHOOK: type: CREATETABLE
      PREHOOK: Output: database:default
      PREHOOK: Output: default@dest1
    -POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
      POSTHOOK: type: CREATETABLE
      POSTHOOK: Output: database:default
      POSTHOOK: Output: default@dest1

    http://git-wip-us.apache.org/repos/asf/hive/blob/bb4f5e70/ql/src/test/results/clientpositive/having2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out
    index aafd3b6..ba601f9 100644
    --- a/ql/src/test/results/clientpositive/having2.q.out
    +++ b/ql/src/test/results/clientpositive/having2.q.out
    @@ -242,3 +242,356 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: s1
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Filter Operator
    + predicate: customer_name is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Reduce Output Operator
    + key expressions: customer_name (type: string)
    + sort order: +
    + Map-reduce partition columns: customer_name (type: string)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + value expressions: discount (type: double), customer_balance (type: double)
    + TableScan
    + alias: s2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + value expressions: value (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 customer_name (type: string)
    + 1 key (type: string)
    + outputColumnNames: _col6, _col18, _col21, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
    + outputColumnNames: _col18, _col21, _col6, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col21), avg(_col6), count(_col54)
    + keys: _col18 (type: string)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col0 (type: string)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: string)
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
    + keys: KEY._col0 (type: string)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((_col1 <= 4074689.000000041) and (_col2 <= 822.0)) and (_col3 > 4)) (type: boolean)
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name, s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT s1.customer_name FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +GROUP BY s1.customer_name, s1.customer_name
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: s1
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Filter Operator
    + predicate: customer_name is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Reduce Output Operator
    + key expressions: customer_name (type: string)
    + sort order: +
    + Map-reduce partition columns: customer_name (type: string)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + value expressions: discount (type: double), customer_balance (type: double)
    + TableScan
    + alias: s2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + value expressions: value (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 customer_name (type: string)
    + 1 key (type: string)
    + outputColumnNames: _col6, _col18, _col21, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
    + outputColumnNames: _col18, _col21, _col6, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col21), avg(_col6), count(_col54)
    + keys: _col18 (type: string)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col0 (type: string)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: string)
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
    + keys: KEY._col0 (type: string), KEY._col0 (type: string)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4
    + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
    + outputColumnNames: _col1, _col2, _col3, _col4
    + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: string)
    + outputColumnNames: _col0
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +SELECT distinct s1.customer_name as x, s1.customer_name as y
    +FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +SELECT distinct s1.customer_name as x, s1.customer_name as y
    +FROM default.testv1_staples s1 join default.src s2 on s1.customer_name = s2.key
    +HAVING (
    +(SUM(s1.customer_balance) <= 4074689.000000041)
    +AND (AVG(s1.discount) <= 822)
    +AND (COUNT(s2.value) > 4)
    +)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: s1
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Filter Operator
    + predicate: customer_name is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + Reduce Output Operator
    + key expressions: customer_name (type: string)
    + sort order: +
    + Map-reduce partition columns: customer_name (type: string)
    + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
    + value expressions: discount (type: double), customer_balance (type: double)
    + TableScan
    + alias: s2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + value expressions: value (type: string)
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 customer_name (type: string)
    + 1 key (type: string)
    + outputColumnNames: _col6, _col18, _col21, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col18 (type: string), _col21 (type: double), _col6 (type: double), _col54 (type: string)
    + outputColumnNames: _col18, _col21, _col6, _col54
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(_col21), avg(_col6), count(_col54)
    + keys: _col18 (type: string)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col0 (type: string)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: string)
    + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
    + keys: KEY._col0 (type: string), KEY._col0 (type: string)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4
    + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
    + outputColumnNames: _col1, _col2, _col3, _col4
    + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((_col2 <= 4074689.000000041) and (_col3 <= 822.0)) and (_col4 > 4)) (type: boolean)
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: string), _col1 (type: string)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 5 Data size: 53 Basic stats: COMPLETE Column stats: NONE
    +