FAQ
Repository: hive
Updated Branches:
   refs/heads/master 392145812 -> a4c43f033


HIVE-11945: ORC with non-local reads may not be reusing connection to DN (Rajesh Balamohan reviewed by Sergey Shelukhin, Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4c43f03
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4c43f03
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4c43f03

Branch: refs/heads/master
Commit: a4c43f0335b33a75d2e9f3dc53b3cd33f8f115cf
Parents: 3921458
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Mon Sep 28 20:10:50 2015 -0500
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Mon Sep 28 20:10:50 2015 -0500

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/io/orc/MetadataReader.java | 6 ++----
  .../org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java | 12 +++++-------
  .../apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java | 5 +++--
  3 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
index cdc0372..43d2933 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
@@ -82,8 +82,7 @@ public class MetadataReader {
          }
          if ((included == null || included[col]) && indexes[col] == null) {
            byte[] buffer = new byte[len];
- file.seek(offset);
- file.readFully(buffer);
+ file.readFully(offset, buffer, 0, buffer.length);
            ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
            indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
                bb, new long[]{0}, stream.getLength(), codec, bufferSize));
@@ -108,8 +107,7 @@ public class MetadataReader {

      // read the footer
      ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
- file.seek(offset);
- file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
+ file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
      return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
          Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
          tailLength, codec, bufferSize));

http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index ab539c4..23b3b55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -244,9 +244,8 @@ public class ReaderImpl implements Reader {
      if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
        // If it isn't there, this may be the 0.11.0 version of ORC.
        // Read the first 3 bytes of the file to check for the header
- in.seek(0);
        byte[] header = new byte[len];
- in.readFully(header, 0, len);
+ in.readFully(0, header, 0, len);
        // if it isn't there, this isn't an ORC file
        if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
          throw new FileFormatException("Malformed ORC file " + path +
@@ -472,10 +471,10 @@ public class ReaderImpl implements Reader {

      //read last bytes into buffer to get PostScript
      int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
- file.seek(size - readSize);
      ByteBuffer buffer = ByteBuffer.allocate(readSize);
      assert buffer.position() == 0;
- file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
+ file.readFully((size - readSize),
+ buffer.array(), buffer.arrayOffset(), readSize);
      buffer.position(0);

      //read the PostScript
@@ -495,10 +494,9 @@ public class ReaderImpl implements Reader {
      int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
      if (extra > 0) {
        //more bytes need to be read, seek back to the right place and read extra bytes
- file.seek(size - readSize - extra);
        ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
- file.readFully(extraBuf.array(),
- extraBuf.arrayOffset() + extraBuf.position(), extra);
+ file.readFully((size - readSize - extra), extraBuf.array(),
+ extraBuf.arrayOffset() + extraBuf.position(), extra);
        extraBuf.position(extra);
        //append with already read bytes
        extraBuf.put(buffer);

http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
index ded3979..9c9a1c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
@@ -246,8 +246,8 @@ public class RecordReaderUtils {
        }
        int len = (int) (range.getEnd() - range.getOffset());
        long off = range.getOffset();
- file.seek(base + off);
        if (zcr != null) {
+ file.seek(base + off);
          boolean hasReplaced = false;
          while (len > 0) {
            ByteBuffer partial = zcr.readBuffer(len, false);
@@ -264,12 +264,13 @@ public class RecordReaderUtils {
            off += read;
          }
        } else if (doForceDirect) {
+ file.seek(base + off);
          ByteBuffer directBuf = ByteBuffer.allocateDirect(len);
          readDirect(file, len, directBuf);
          range = range.replaceSelfWith(new BufferChunk(directBuf, range.getOffset()));
        } else {
          byte[] buffer = new byte[len];
- file.readFully(buffer, 0, buffer.length);
+ file.readFully((base + off), buffer, 0, buffer.length);
          range = range.replaceSelfWith(new BufferChunk(ByteBuffer.wrap(buffer), range.getOffset()));
        }
        range = range.next;

Search Discussions

  • Pxiong at Sep 30, 2015 at 12:56 am
    Repository: hive
    Updated Branches:
       refs/heads/master 6a8d7e4cd -> cdaf35674


    HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567

    Branch: refs/heads/master
    Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
    Parents: 6a8d7e4
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Tue Sep 29 17:47:39 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Tue Sep 29 17:47:39 2015 -0700

    ----------------------------------------------------------------------
      .../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
      .../clientpositive/metadata_only_queries.q | 15 ++
      .../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
      .../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
      .../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
      5 files changed, 552 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    index bc8d8f7..5a21e6b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;

      import java.util.ArrayList;
      import java.util.Collection;
    +import java.util.HashMap;
    +import java.util.HashSet;
      import java.util.LinkedHashMap;
      import java.util.List;
      import java.util.Map;
    @@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
                return null;
              }
              Operator<?> last = (Operator<?>) stack.get(5);
    + SelectOperator cselOp = null;
    + Map<Integer,Object> posToConstant = new HashMap<>();
              if (last instanceof SelectOperator) {
    - SelectOperator cselOp = (SelectOperator) last;
    + cselOp = (SelectOperator) last;
                if (!cselOp.isIdentitySelect()) {
    - return null; // todo we can do further by providing operator to fetch task
    + for (int pos = 0; pos < cselOp.getConf().getColList().size(); pos++) {
    + ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
    + if (desc instanceof ExprNodeConstantDesc) {
    + //We store the position to the constant value for later use.
    + posToConstant.put(pos, ((ExprNodeConstantDesc)desc).getValue());
    + } else {
    + if (!(desc instanceof ExprNodeColumnDesc)) {
    + // Probably an expression, cant handle that
    + return null;
    + }
    + }
    + }
                }
                last = (Operator<?>) stack.get(6);
              }
    @@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {


              List<List<Object>> allRows = new ArrayList<List<Object>>();
    - allRows.add(oneRow);
    -
              List<String> colNames = new ArrayList<String>();
              List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    - for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
    - colNames.add(colInfo.getInternalName());
    - ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + if (cselOp == null) {
    + allRows.add(oneRow);
    + for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
    + colNames.add(colInfo.getInternalName());
    + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + }
    + } else {
    + int aggrPos = 0;
    + List<Object> oneRowWithConstant = new ArrayList<>();
    + for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) {
    + if (posToConstant.containsKey(pos)) {
    + // This position is a constant.
    + oneRowWithConstant.add(posToConstant.get(pos));
    + } else {
    + // This position is an aggregation.
    + oneRowWithConstant.add(oneRow.get(aggrPos++));
    + }
    + ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
    + colNames.add(colInfo.getInternalName());
    + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + }
    + allRows.add(oneRowWithConstant);
              }
              StandardStructObjectInspector sOI = ObjectInspectorFactory.
                  getStandardStructObjectInspector(colNames, ois);

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
    index 56f3a78..70fac92 100644
    --- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
    +++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
    @@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), co
      explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;

    +explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
    +explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
    +
      analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
      analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
      analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
    @@ -69,6 +74,12 @@ explain
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;

    +explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
    +
    +
    +
      explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
    @@ -76,6 +87,10 @@ explain
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;

    +explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
    +
      explain select count(ts) from stats_tbl_part;

      drop table stats_tbl;

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    index 2dcd437..65a4dfa 100644
    --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    @@ -276,6 +276,114 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -364,6 +472,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -414,6 +547,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    index b2221fc..0d85f4e 100644
    --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    @@ -288,6 +288,126 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 2 <- Map 1 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 2 <- Map 1 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    index f43440e..ab86ab0 100644
    --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    @@ -288,6 +288,126 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part
  • Jxiang at Sep 30, 2015 at 3:15 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 1636292ad -> 46e03c44f


    HIVE-11984: Add HS2 open operation metrics (Jimmy, reviewed by Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/46e03c44
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/46e03c44
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/46e03c44

    Branch: refs/heads/branch-1
    Commit: 46e03c44f40cfc6adbfecc6e4f1cd3328738986d
    Parents: 1636292
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Mon Sep 28 15:24:44 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Wed Sep 30 08:08:37 2015 -0700

    ----------------------------------------------------------------------
      .../hive/common/metrics/common/MetricsConstant.java | 1 +
      .../apache/hive/service/cli/operation/Operation.java | 13 ++++++++++++-
      .../hive/service/cli/operation/OperationManager.java | 11 +++++++++++
      3 files changed, 24 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/46e03c44/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    index d1ebe12..13c3cf9 100644
    --- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    @@ -27,6 +27,7 @@ public class MetricsConstant {
        public static String JVM_EXTRA_SLEEP = "jvm.pause.extraSleepTime";

        public static String OPEN_CONNECTIONS = "open_connections";
    + public static String OPEN_OPERATIONS = "open_operations";

        public static String JDO_ACTIVE_TRANSACTIONS = "active_jdo_transactions";
        public static String JDO_ROLLBACK_TRANSACTIONS = "rollbacked_jdo_transactions";

    http://git-wip-us.apache.org/repos/asf/hive/blob/46e03c44/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    index 19153b6..0ab38c9 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    @@ -25,6 +25,9 @@ import java.util.concurrent.TimeUnit;

      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.hive.common.metrics.common.Metrics;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
      import org.apache.hadoop.hive.ql.session.OperationLog;
    @@ -251,9 +254,17 @@ public abstract class Operation {
         */
        protected abstract void runInternal() throws HiveSQLException;

    - public void run() throws HiveSQLException {
    + public final void run() throws HiveSQLException {
          beforeRun();
          try {
    + Metrics metrics = MetricsFactory.getInstance();
    + if (metrics != null) {
    + try {
    + metrics.incrementCounter(MetricsConstant.OPEN_OPERATIONS);
    + } catch (Exception e) {
    + LOG.warn("Error Reporting open operation to Metrics system", e);
    + }
    + }
            runInternal();
          } finally {
            afterRun();

    http://git-wip-us.apache.org/repos/asf/hive/blob/46e03c44/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    index 9b0a519..5bc9fef 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    @@ -26,6 +26,9 @@ import java.util.Map;

      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.hive.common.metrics.common.Metrics;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
    @@ -202,6 +205,14 @@ public class OperationManager extends AbstractService {
          if (operation == null) {
            throw new HiveSQLException("Operation does not exist!");
          }
    + Metrics metrics = MetricsFactory.getInstance();
    + if (metrics != null) {
    + try {
    + metrics.decrementCounter(MetricsConstant.OPEN_OPERATIONS);
    + } catch (Exception e) {
    + LOG.warn("Error Reporting close operation to Metrics system", e);
    + }
    + }
          operation.close();
        }
  • Jxiang at Sep 30, 2015 at 5:41 pm
    Repository: hive
    Updated Branches:
       refs/heads/master e9b4d7e4e -> 265e42c5f


    HIVE-11984: Add HS2 open operation metrics (addendum) (Jimmy, reviewed by Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/265e42c5
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/265e42c5
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/265e42c5

    Branch: refs/heads/master
    Commit: 265e42c5f45f7c160861132bbfa6ab78a85830e9
    Parents: e9b4d7e
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Wed Sep 30 10:39:08 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Wed Sep 30 10:40:46 2015 -0700

    ----------------------------------------------------------------------
      .../src/java/org/apache/hive/service/cli/operation/Operation.java | 2 +-
      1 file changed, 1 insertion(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/265e42c5/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    index 0ab38c9..515299c 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    @@ -254,7 +254,7 @@ public abstract class Operation {
         */
        protected abstract void runInternal() throws HiveSQLException;

    - public final void run() throws HiveSQLException {
    + public void run() throws HiveSQLException {
          beforeRun();
          try {
            Metrics metrics = MetricsFactory.getInstance();
  • Jxiang at Sep 30, 2015 at 5:46 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 9761f28ae -> 8a27ae3df


    HIVE-11984: Add HS2 open operation metrics (addendum) (Jimmy, reviewed by Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a27ae3d
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a27ae3d
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a27ae3d

    Branch: refs/heads/branch-1
    Commit: 8a27ae3dff55a3ef8b7688d175d2476fb14b0064
    Parents: 9761f28
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Wed Sep 30 10:39:08 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Wed Sep 30 10:41:37 2015 -0700

    ----------------------------------------------------------------------
      .../src/java/org/apache/hive/service/cli/operation/Operation.java | 2 +-
      1 file changed, 1 insertion(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/8a27ae3d/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    index 0ab38c9..515299c 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    @@ -254,7 +254,7 @@ public abstract class Operation {
         */
        protected abstract void runInternal() throws HiveSQLException;

    - public final void run() throws HiveSQLException {
    + public void run() throws HiveSQLException {
          beforeRun();
          try {
            Metrics metrics = MetricsFactory.getInstance();
  • Sershe at Sep 30, 2015 at 6:37 pm
    Repository: hive
    Updated Branches:
       refs/heads/master 8c8cc19fd -> 064e37c46


    HIVE-11823 : create a self-contained translation for SARG to be used by metastore (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/064e37c4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/064e37c4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/064e37c4

    Branch: refs/heads/master
    Commit: 064e37c460d1c464431f740e480a6f08353d69e6
    Parents: 8c8cc19
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Wed Sep 30 11:35:36 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Wed Sep 30 11:35:47 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 49 ++++++++++++++++++++
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 35 ++++++++++++++
      2 files changed, 84 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    index 52e1b06..c45b6e6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
      import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
    +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
      import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
      import org.apache.hadoop.hive.serde2.SerDeStats;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    @@ -254,6 +255,40 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          }
        }

    + /**
    + * Modifies the SARG, replacing column names with column indexes in target table schema. This
    + * basically does the same thing as all the shennannigans with included columns, except for the
    + * last step where ORC gets direct subtypes of root column and uses the ordered match to map
    + * table columns to file columns. The numbers put into predicate leaf should allow to go into
    + * said subtypes directly by index to get the proper index in the file.
    + * This won't work with schema evolution, although it's probably much easier to reason about
    + * if schema evolution was to be supported, because this is a clear boundary between table
    + * schema columns and all things ORC. None of the ORC stuff is used here and none of the
    + * table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply
    + * the SARG thus modified.
    + */
    + public static void translateSargToTableColIndexes(
    + SearchArgument sarg, Configuration conf, int rootColumn) {
    + String nameStr = getNeededColumnNamesString(conf), idStr = getSargColumnIDsString(conf);
    + String[] knownNames = nameStr.split(",");
    + String[] idStrs = (idStr == null) ? null : idStr.split(",");
    + assert idStrs == null || knownNames.length == idStrs.length;
    + HashMap<String, Integer> nameIdMap = new HashMap<>();
    + for (int i = 0; i < knownNames.length; ++i) {
    + nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
    + }
    + List<PredicateLeaf> leaves = sarg.getLeaves();
    + for (int i = 0; i < leaves.size(); ++i) {
    + PredicateLeaf pl = leaves.get(i);
    + Integer colId = nameIdMap.get(pl.getColumnName());
    + String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId);
    + SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
    + }
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("SARG translated into " + sarg);
    + }
    + }
    +
        public static boolean[] genIncludedColumns(
            List<OrcProto.Type> types, List<Integer> included, boolean isOriginal) {
          int rootColumn = getRootColumn(isOriginal);
    @@ -1342,6 +1377,20 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
              directory);
        }

    + public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg,
    + WriterVersion writerVersion, List<OrcProto.Type> types,
    + List<StripeStatistics> stripeStats, int stripeCount) {
    + LOG.info("Translated ORC pushdown predicate: " + sarg);
    + assert sarg != null;
    + if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
    + return null; // only do split pruning if HIVE-8732 has been fixed in the writer
    + }
    + // eliminate stripes that doesn't satisfy the predicate condition
    + List<PredicateLeaf> sargLeaves = sarg.getLeaves();
    + int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
    + return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null);
    + }
    +
        private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames,
            WriterVersion writerVersion, boolean isOriginal, List<StripeStatistics> stripeStats,
            int stripeCount, Path filePath) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    index ba304ba..77d2cc6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    @@ -1221,4 +1221,39 @@ class RecordReaderImpl implements RecordReader {
          // if we aren't to the right row yet, advance in the stripe.
          advanceToNextRow(reader, rowNumber, true);
        }
    +
    + private static final String TRANSLATED_SARG_SEPARATOR = "_";
    + public static String encodeTranslatedSargColumn(int rootColumn, Integer indexInSourceTable) {
    + return rootColumn + TRANSLATED_SARG_SEPARATOR
    + + ((indexInSourceTable == null) ? -1 : indexInSourceTable);
    + }
    +
    + public static int[] mapTranslatedSargColumns(
    + List<OrcProto.Type> types, List<PredicateLeaf> sargLeaves) {
    + int[] result = new int[sargLeaves.size()];
    + OrcProto.Type lastRoot = null; // Root will be the same for everyone as of now.
    + String lastRootStr = null;
    + for (int i = 0; i < result.length; ++i) {
    + String[] rootAndIndex = sargLeaves.get(i).getColumnName().split(TRANSLATED_SARG_SEPARATOR);
    + assert rootAndIndex.length == 2;
    + String rootStr = rootAndIndex[0], indexStr = rootAndIndex[1];
    + int index = Integer.parseInt(indexStr);
    + // First, check if the column even maps to anything.
    + if (index == -1) {
    + result[i] = -1;
    + continue;
    + }
    + assert index >= 0;
    + // Then, find the root type if needed.
    + if (!rootStr.equals(lastRootStr)) {
    + lastRoot = types.get(Integer.parseInt(rootStr));
    + lastRootStr = rootStr;
    + }
    + // Subtypes of the root types correspond, in order, to the columns in the table schema
    + // (disregarding schema evolution that doesn't presently work). Get the index for the
    + // corresponding subtype.
    + result[i] = lastRoot.getSubtypes(index);
    + }
    + return result;
    + }
      }
  • Jdere at Sep 30, 2015 at 6:41 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 8a27ae3df -> f01e2ad35


    HIVE-11920: ADD JAR failing with URL schemes other than file/ivy/hdfs (Jason Dere, reviewed by Hari Subramaniyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f01e2ad3
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f01e2ad3
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f01e2ad3

    Branch: refs/heads/branch-1
    Commit: f01e2ad359ef74e498b304b2ba54145550038999
    Parents: 8a27ae3
    Author: Jason Dere <jdere@hortonworks.com>
    Authored: Wed Sep 30 11:21:31 2015 -0700
    Committer: Jason Dere <jdere@hortonworks.com>
    Committed: Wed Sep 30 11:25:10 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/ql/session/SessionState.java | 9 ++-------
      ql/src/test/queries/clientpositive/add_jar_pfile.q | 8 ++++++++
      ql/src/test/results/clientpositive/add_jar_pfile.q.out | 12 ++++++++++++
      3 files changed, 22 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/f01e2ad3/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    index 383ba6a..34e5f40 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    @@ -1219,11 +1219,8 @@ public class SessionState {
          String scheme = uri.getScheme() == null ? null : uri.getScheme().toLowerCase();
          if (scheme == null || scheme.equals("file")) {
            return "file";
    - } else if (scheme.equals("hdfs") || scheme.equals("ivy")) {
    - return scheme;
    - } else {
    - throw new RuntimeException("invalid url: " + uri + ", expecting ( file | hdfs | ivy) as url scheme. ");
          }
    + return scheme;
        }

        List<URI> resolveAndDownload(ResourceType t, String value, boolean convertToUnix) throws URISyntaxException,
    @@ -1233,10 +1230,8 @@ public class SessionState {
            return Arrays.asList(uri);
          } else if (getURLType(value).equals("ivy")) {
            return dependencyResolver.downloadDependencies(uri);
    - } else if (getURLType(value).equals("hdfs")) {
    - return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
          } else {
    - throw new RuntimeException("Invalid url " + uri);
    + return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
          }
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/f01e2ad3/ql/src/test/queries/clientpositive/add_jar_pfile.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/add_jar_pfile.q b/ql/src/test/queries/clientpositive/add_jar_pfile.q
    new file mode 100644
    index 0000000..ed55518
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/add_jar_pfile.q
    @@ -0,0 +1,8 @@
    +
    +dfs -copyFromLocal ${system:maven.local.repository}/org/apache/hive/hive-contrib/${system:hive.version}/hive-contrib-${system:hive.version}.jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
    +
    +add jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
    +
    +CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd';
    +
    +DROP TEMPORARY FUNCTION example_add;

    http://git-wip-us.apache.org/repos/asf/hive/blob/f01e2ad3/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/add_jar_pfile.q.out b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    new file mode 100644
    index 0000000..60c65cc
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    @@ -0,0 +1,12 @@
    +PREHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
    +PREHOOK: type: CREATEFUNCTION
    +PREHOOK: Output: example_add
    +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
    +POSTHOOK: type: CREATEFUNCTION
    +POSTHOOK: Output: example_add
    +PREHOOK: query: DROP TEMPORARY FUNCTION example_add
    +PREHOOK: type: DROPFUNCTION
    +PREHOOK: Output: example_add
    +POSTHOOK: query: DROP TEMPORARY FUNCTION example_add
    +POSTHOOK: type: DROPFUNCTION
    +POSTHOOK: Output: example_add
  • Sershe at Sep 30, 2015 at 7:29 pm
    Repository: hive
    Updated Branches:
       refs/heads/llap 7273a4c44 -> f272ccb25


    HIVE-11134 - HS2 should log open session failure (Thejas Nair via Vaibhav Gumashta)

    Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39214581
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39214581
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39214581

    Branch: refs/heads/llap
    Commit: 39214581297d4df20599a36afdcc84dca70caacb
    Parents: 1cf7e25
    Author: Thejas Nair <thejas@hortonworks.com>
    Authored: Sat Jun 27 07:04:40 2015 -0700
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Mon Sep 28 13:44:36 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hive/service/cli/session/SessionManager.java | 6 ++++--
      1 file changed, 4 insertions(+), 2 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/39214581/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
    index 77c5e66..1119fd3 100644
    --- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
    +++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java
    @@ -289,13 +289,14 @@ public class SessionManager extends CompositeService {
          try {
            session.open(sessionConf);
          } catch (Exception e) {
    + LOG.warn("Failed to open session", e);
            try {
              session.close();
            } catch (Throwable t) {
              LOG.warn("Error closing session", t);
            }
            session = null;
    - throw new HiveSQLException("Failed to open new session: " + e, e);
    + throw new HiveSQLException("Failed to open new session: " + e.getMessage(), e);
          }
          if (isOperationLogEnabled) {
            session.setOperationLogSessionDir(operationLogRootDir);
    @@ -303,13 +304,14 @@ public class SessionManager extends CompositeService {
          try {
            executeSessionHooks(session);
          } catch (Exception e) {
    + LOG.warn("Failed to execute session hooks", e);
            try {
              session.close();
            } catch (Throwable t) {
              LOG.warn("Error closing session", t);
            }
            session = null;
    - throw new HiveSQLException("Failed to execute session hooks", e);
    + throw new HiveSQLException("Failed to execute session hooks: " + e.getMessage(), e);
          }
          handleToSession.put(session.getSessionHandle(), session);
          return session.getSessionHandle();
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11468: (addendum) Vectorize Struct IN() clauses (Matt McCline, via Gopal V)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b801d12c
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b801d12c
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b801d12c

    Branch: refs/heads/llap
    Commit: b801d12cb43c48d45731aaafccc06f14484fc6ab
    Parents: a5ffa71
    Author: Gopal V <gopalv@apache.org>
    Authored: Tue Sep 29 14:57:54 2015 -0700
    Committer: Gopal V <gopalv@apache.org>
    Committed: Tue Sep 29 14:57:54 2015 -0700

    ----------------------------------------------------------------------
      .../ql/optimizer/physical/Vectorizer.java.rej | 86 --------------------
      1 file changed, 86 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b801d12c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
    deleted file mode 100644
    index 5a10b58..0000000
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.rej
    +++ /dev/null
    @@ -1,86 +0,0 @@
    -***************
    -*** 1255,1272 ****
    - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
    - return false;
    - }
    - if (desc instanceof ExprNodeGenericFuncDesc) {
    - ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
    - boolean r = validateGenericUdf(d);
    - if (!r) {
    - return false;
    - }
    - }
    - if (desc.getChildren() != null) {
    -- for (ExprNodeDesc d: desc.getChildren()) {
    -- // Don't restrict child expressions for projection. Always use looser FILTER mode.
    -- boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
    -- if (!r) {
    - return false;
    - }
    - }
    ---- 1265,1329 ----
    - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
    - return false;
    - }
    -+ boolean isInExpression = false;
    - if (desc instanceof ExprNodeGenericFuncDesc) {
    - ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
    - boolean r = validateGenericUdf(d);
    - if (!r) {
    - return false;
    - }
    -+ GenericUDF genericUDF = d.getGenericUDF();
    -+ isInExpression = (genericUDF instanceof GenericUDFIn);
    - }
    - if (desc.getChildren() != null) {
    -+ if (isInExpression &&
    -+ desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
    -+ boolean r = validateStructInExpression(desc, VectorExpressionDescriptor.Mode.FILTER);
    -+ } else {
    -+ for (ExprNodeDesc d: desc.getChildren()) {
    -+ // Don't restrict child expressions for projection. Always use looser FILTER mode.
    -+ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
    -+ if (!r) {
    -+ return false;
    -+ }
    -+ }
    -+ }
    -+ }
    -+ return true;
    -+ }
    -+
    -+ private boolean validateStructInExpression(ExprNodeDesc desc,
    -+ VectorExpressionDescriptor.Mode mode) {
    -+
    -+ for (ExprNodeDesc d: desc.getChildren()) {
    -+ TypeInfo typeInfo = d.getTypeInfo();
    -+ if (typeInfo.getCategory() != Category.STRUCT){
    -+ return false;
    -+ }
    -+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
    -+
    -+ ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    -+ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
    -+ final int fieldCount = fieldTypeInfos.size();
    -+ for (int f = 0; f < fieldCount; f++) {
    -+ TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
    -+ Category category = fieldTypeInfo.getCategory();
    -+ if (category != Category.PRIMITIVE){
    -+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
    -+ " of type " + fieldTypeInfo.getTypeName());
    -+ return false;
    -+ }
    -+ PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
    -+ InConstantType inConstantType =
    -+ VectorizationContext.getInConstantTypeFromPrimitiveCategory(
    -+ fieldPrimitiveTypeInfo.getPrimitiveCategory());
    -+
    -+ // For now, limit the data types we support for Vectorized Struct IN().
    -+ if (inConstantType != InConstantType.INT_FAMILY &&
    -+ inConstantType != InConstantType.FLOAT_FAMILY &&
    -+ inConstantType != InConstantType.STRING_FAMILY) {
    -+ LOG.info("Cannot vectorize struct field " + fieldNames.get(f) +
    -+ " of type " + fieldTypeInfo.getTypeName());
    - return false;
    - }
    - }
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11989: vector_groupby_reduce.q is failing on CLI and MiniTez drivers on master (Pengcheng Xiong, reviewed by Matt McCline)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9b4d7e4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9b4d7e4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9b4d7e4

    Branch: refs/heads/llap
    Commit: e9b4d7e4e89cd68eeda58b2e2e6014ff24d0a690
    Parents: 1f08789
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Wed Sep 30 10:06:36 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Wed Sep 30 10:06:36 2015 -0700

    ----------------------------------------------------------------------
      .../tez/vector_groupby_reduce.q.out | 70 +++++++++++++-------
      .../clientpositive/vector_groupby_reduce.q.out | 69 +++++++++++++------
      2 files changed, 94 insertions(+), 45 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/e9b4d7e4/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
    index fe7e829..1635462 100644
    --- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
    +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
    @@ -399,7 +399,7 @@ STAGE PLANS:
                            sort order: +
                            Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
                  Execution mode: vectorized
    - Reducer 3
    + Reducer 3
                  Reduce Operator Tree:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int)
    @@ -562,31 +562,32 @@ STAGE PLANS:
            Edges:
              Reducer 2 <- Map 1 (SIMPLE_EDGE)
              Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
    + Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
      #### A masked pattern was here ####
            Vertices:
    - Map 1
    + Map 1
                  Map Operator Tree:
                      TableScan
                        alias: store_sales
                        Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
    - expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
    - outputColumnNames: _col0, _col1, _col2
    + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
    + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
                          Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                          Group By Operator
    - aggregations: min(_col2)
    - keys: _col0 (type: int), _col1 (type: int)
    + aggregations: min(ss_quantity)
    + keys: ss_item_sk (type: int), ss_ticket_number (type: int)
                            mode: hash
                            outputColumnNames: _col0, _col1, _col2
                            Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                            Reduce Output Operator
                              key expressions: _col0 (type: int), _col1 (type: int)
                              sort order: ++
    - Map-reduce partition columns: _col0 (type: int)
    + Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                              Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                              value expressions: _col2 (type: int)
                  Execution mode: vectorized
    - Reducer 2
    + Reducer 2
                  Reduce Operator Tree:
                    Group By Operator
                      aggregations: min(VALUE._col0)
    @@ -595,18 +596,33 @@ STAGE PLANS:
                      outputColumnNames: _col0, _col1, _col2
                      Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
    - aggregations: sum(_col1), sum(_col2)
    - keys: _col0 (type: int)
    - mode: complete
    + aggregations: sum(_col0), sum(_col2)
    + keys: _col1 (type: int)
    + mode: hash
                        outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
                        Reduce Output Operator
                          key expressions: _col0 (type: int)
                          sort order: +
    - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
                          value expressions: _col1 (type: bigint), _col2 (type: bigint)
                  Execution mode: vectorized
    - Reducer 3
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0), sum(VALUE._col1)
    + keys: KEY._col0 (type: int)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: bigint), _col2 (type: bigint)
    + Execution mode: vectorized
    + Reducer 4
                  Reduce Operator Tree:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint)
    @@ -771,18 +787,18 @@ STAGE PLANS:
              Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
      #### A masked pattern was here ####
            Vertices:
    - Map 1
    + Map 1
                  Map Operator Tree:
                      TableScan
                        alias: store_sales
                        Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
    - expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
    - outputColumnNames: _col0, _col1, _col2
    + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
    + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
                          Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                          Group By Operator
    - aggregations: min(_col2)
    - keys: _col0 (type: int), _col1 (type: int)
    + aggregations: min(ss_quantity)
    + keys: ss_item_sk (type: int), ss_ticket_number (type: int)
                            mode: hash
                            outputColumnNames: _col0, _col1, _col2
                            Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
    @@ -793,7 +809,7 @@ STAGE PLANS:
                              Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                              value expressions: _col2 (type: int)
                  Execution mode: vectorized
    - Reducer 2
    + Reducer 2
                  Reduce Operator Tree:
                    Group By Operator
                      aggregations: min(VALUE._col0)
    @@ -807,13 +823,17 @@ STAGE PLANS:
                        mode: complete
                        outputColumnNames: _col0, _col1, _col2
                        Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    - Reduce Output Operator
    - key expressions: _col0 (type: int), _col1 (type: int)
    - sort order: ++
    + Select Operator
    + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2
                          Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    - value expressions: _col2 (type: bigint)
    + Reduce Output Operator
    + key expressions: _col0 (type: int), _col1 (type: int)
    + sort order: ++
    + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col2 (type: bigint)
                  Execution mode: vectorized
    - Reducer 3
    + Reducer 3
                  Reduce Operator Tree:
                    Select Operator
                      expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint)

    http://git-wip-us.apache.org/repos/asf/hive/blob/e9b4d7e4/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
    index fc1997c..3e7077e 100644
    --- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
    +++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
    @@ -550,7 +550,8 @@ POSTHOOK: type: QUERY
      STAGE DEPENDENCIES:
        Stage-1 is a root stage
        Stage-2 depends on stages: Stage-1
    - Stage-0 depends on stages: Stage-2
    + Stage-3 depends on stages: Stage-2
    + Stage-0 depends on stages: Stage-3

      STAGE PLANS:
        Stage: Stage-1
    @@ -560,19 +561,19 @@ STAGE PLANS:
                  alias: store_sales
                  Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
    - expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
    - outputColumnNames: _col0, _col1, _col2
    + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
    + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
                    Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                    Group By Operator
    - aggregations: min(_col2)
    - keys: _col0 (type: int), _col1 (type: int)
    + aggregations: min(ss_quantity)
    + keys: ss_item_sk (type: int), ss_ticket_number (type: int)
                      mode: hash
                      outputColumnNames: _col0, _col1, _col2
                      Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: int), _col1 (type: int)
                        sort order: ++
    - Map-reduce partition columns: _col0 (type: int)
    + Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
                        Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col2 (type: int)
            Execution mode: vectorized
    @@ -584,11 +585,11 @@ STAGE PLANS:
                outputColumnNames: _col0, _col1, _col2
                Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
                Group By Operator
    - aggregations: sum(_col1), sum(_col2)
    - keys: _col0 (type: int)
    - mode: complete
    + aggregations: sum(_col0), sum(_col2)
    + keys: _col1 (type: int)
    + mode: hash
                  outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    table:
    @@ -603,6 +604,30 @@ STAGE PLANS:
                  Reduce Output Operator
                    key expressions: _col0 (type: int)
                    sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: bigint), _col2 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0), sum(VALUE._col1)
    + keys: KEY._col0 (type: int)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    +
    + Stage: Stage-3
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
                    Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col1 (type: bigint), _col2 (type: bigint)
            Reduce Operator Tree:
    @@ -769,12 +794,12 @@ STAGE PLANS:
                  alias: store_sales
                  Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
    - expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int)
    - outputColumnNames: _col0, _col1, _col2
    + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int)
    + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity
                    Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
                    Group By Operator
    - aggregations: min(_col2)
    - keys: _col0 (type: int), _col1 (type: int)
    + aggregations: min(ss_quantity)
    + keys: ss_item_sk (type: int), ss_ticket_number (type: int)
                      mode: hash
                      outputColumnNames: _col0, _col1, _col2
                      Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE
    @@ -798,12 +823,16 @@ STAGE PLANS:
                  mode: complete
                  outputColumnNames: _col0, _col1, _col2
                  Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    - File Output Operator
    - compressed: false
    - table:
    - input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
    + Select Operator
    + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.mapred.SequenceFileInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

        Stage: Stage-2
          Map Reduce
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11984: Add HS2 open operation metrics (Jimmy, reviewed by Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1cb30733
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1cb30733
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1cb30733

    Branch: refs/heads/llap
    Commit: 1cb307336c32104f00351a7aa1282c3fdc7f12d0
    Parents: cdaf356
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Mon Sep 28 15:24:44 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Wed Sep 30 07:49:02 2015 -0700

    ----------------------------------------------------------------------
      .../hive/common/metrics/common/MetricsConstant.java | 1 +
      .../apache/hive/service/cli/operation/Operation.java | 13 ++++++++++++-
      .../hive/service/cli/operation/OperationManager.java | 11 +++++++++++
      3 files changed, 24 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    index d1ebe12..13c3cf9 100644
    --- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
    @@ -27,6 +27,7 @@ public class MetricsConstant {
        public static String JVM_EXTRA_SLEEP = "jvm.pause.extraSleepTime";

        public static String OPEN_CONNECTIONS = "open_connections";
    + public static String OPEN_OPERATIONS = "open_operations";

        public static String JDO_ACTIVE_TRANSACTIONS = "active_jdo_transactions";
        public static String JDO_ROLLBACK_TRANSACTIONS = "rollbacked_jdo_transactions";

    http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    index 19153b6..0ab38c9 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    @@ -25,6 +25,9 @@ import java.util.concurrent.TimeUnit;

      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.hive.common.metrics.common.Metrics;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
      import org.apache.hadoop.hive.ql.session.OperationLog;
    @@ -251,9 +254,17 @@ public abstract class Operation {
         */
        protected abstract void runInternal() throws HiveSQLException;

    - public void run() throws HiveSQLException {
    + public final void run() throws HiveSQLException {
          beforeRun();
          try {
    + Metrics metrics = MetricsFactory.getInstance();
    + if (metrics != null) {
    + try {
    + metrics.incrementCounter(MetricsConstant.OPEN_OPERATIONS);
    + } catch (Exception e) {
    + LOG.warn("Error Reporting open operation to Metrics system", e);
    + }
    + }
            runInternal();
          } finally {
            afterRun();

    http://git-wip-us.apache.org/repos/asf/hive/blob/1cb30733/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    index 304a525..e29b4b6 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/OperationManager.java
    @@ -26,6 +26,9 @@ import java.util.Map;

      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.hive.common.metrics.common.Metrics;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
    +import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
    @@ -209,6 +212,14 @@ public class OperationManager extends AbstractService {
          if (operation == null) {
            throw new HiveSQLException("Operation does not exist!");
          }
    + Metrics metrics = MetricsFactory.getInstance();
    + if (metrics != null) {
    + try {
    + metrics.decrementCounter(MetricsConstant.OPEN_OPERATIONS);
    + } catch (Exception e) {
    + LOG.warn("Error Reporting close operation to Metrics system", e);
    + }
    + }
          operation.close();
        }
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11724 : WebHcat get jobs to order jobs on time order with latest at top (Kiran Kumar Kolli, reviewed by Hari Subramaniyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a5ffa719
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a5ffa719
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a5ffa719

    Branch: refs/heads/llap
    Commit: a5ffa71908f5f15c5acaef476ad557d98583f2fa
    Parents: a4c43f0
    Author: Hari Subramaniyan <harisankar@apache.org>
    Authored: Tue Sep 29 11:56:51 2015 -0700
    Committer: Hari Subramaniyan <harisankar@apache.org>
    Committed: Tue Sep 29 11:56:51 2015 -0700

    ----------------------------------------------------------------------
      .../hive/hcatalog/templeton/AppConfig.java | 21 ++++++++++++++++++++
      .../apache/hive/hcatalog/templeton/Server.java | 12 +++++++++--
      2 files changed, 31 insertions(+), 2 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/a5ffa719/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
    index 062d5a0..403ff14 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
    @@ -89,6 +89,11 @@ public class AppConfig extends Configuration {
          "webhcat-site.xml"
        };

    + public enum JobsListOrder {
    + lexicographicalasc,
    + lexicographicaldesc,
    + }
    +
        public static final String PORT = "templeton.port";
        public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding";
        public static final String EXEC_ENVS_NAME = "templeton.exec.envs";
    @@ -105,6 +110,7 @@ public class AppConfig extends Configuration {
        public static final String HIVE_PATH_NAME = "templeton.hive.path";
        public static final String MAPPER_MEMORY_MB = "templeton.mapper.memory.mb";
        public static final String MR_AM_MEMORY_MB = "templeton.mr.am.memory.mb";
    + public static final String TEMPLETON_JOBSLIST_ORDER = "templeton.jobs.listorder";

        /**
         * see webhcat-default.xml
    @@ -281,6 +287,21 @@ public class AppConfig extends Configuration {
          }
        }

    + public JobsListOrder getListJobsOrder() {
    + String requestedOrder = get(TEMPLETON_JOBSLIST_ORDER);
    + if (requestedOrder != null) {
    + try {
    + return JobsListOrder.valueOf(requestedOrder.toLowerCase());
    + }
    + catch(IllegalArgumentException ex) {
    + LOG.warn("Ignoring setting " + TEMPLETON_JOBSLIST_ORDER + " configured with in-correct value " + requestedOrder);
    + }
    + }
    +
    + // Default to lexicographicalasc
    + return JobsListOrder.lexicographicalasc;
    + }
    +
        public void startCleanup() {
          JobState.getStorageInstance(this).startCleanup(this);
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/a5ffa719/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
    index 27b8e38..bba16c5 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java
    @@ -1002,8 +1002,16 @@ public class Server {
            throw new BadParam("Invalid numrecords format: numrecords should be an integer > 0");
          }

    - // Sort the list lexicographically
    - Collections.sort(list);
    + // Sort the list as requested
    + switch (appConf.getListJobsOrder()) {
    + case lexicographicaldesc:
    + Collections.sort(list, Collections.reverseOrder());
    + break;
    + case lexicographicalasc:
    + default:
    + Collections.sort(list);
    + break;
    + }

          for (String job : list) {
            // If numRecords = -1, fetch all records.
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11952 : disable q tests that are both slow and less relevant (Sergey Shelukhin, reviewed by Sergio Peña)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e1ce9a23
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e1ce9a23
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e1ce9a23

    Branch: refs/heads/llap
    Commit: e1ce9a23a7045618da6850e3315f785ea8c62d4d
    Parents: b801d12
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Tue Sep 29 15:29:35 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Tue Sep 29 15:29:35 2015 -0700

    ----------------------------------------------------------------------
      itests/qtest/pom.xml | 2 +-
      .../test/resources/testconfiguration.properties | 27 +++++++++++++++-----
      2 files changed, 21 insertions(+), 8 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/e1ce9a23/itests/qtest/pom.xml
    ----------------------------------------------------------------------
    diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
    index 739d06a..74ca88f 100644
    --- a/itests/qtest/pom.xml
    +++ b/itests/qtest/pom.xml
    @@ -436,7 +436,7 @@
                        templatePath="${basedir}/${hive.path.to.root}/ql/src/test/templates/" template="TestCliDriver.vm"
                        queryDirectory="${basedir}/${hive.path.to.root}/ql/src/test/queries/clientpositive/"
                        queryFile="${qfile}"
    - excludeQueryFile="${minimr.query.files},${minitez.query.files},${encrypted.query.files},${spark.only.query.files}"
    + excludeQueryFile="${minimr.query.files},${minitez.query.files},${encrypted.query.files},${spark.only.query.files},${disabled.query.files}"
                        queryFileRegex="${qfile_regex}"
                        clusterMode="${clustermode}"
                        runDisabled="${run_disabled}"

    http://git-wip-us.apache.org/repos/asf/hive/blob/e1ce9a23/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
    index 4f7b25f..700ea63 100644
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@ -34,9 +34,6 @@ minimr.query.files=auto_sortmerge_join_16.q,\
        non_native_window_udf.q, \
        optrstat_groupby.q,\
        parallel_orderby.q,\
    - ql_rewrite_gbtoidx.q,\
    - ql_rewrite_gbtoidx_cbo_1.q,\
    - ql_rewrite_gbtoidx_cbo_2.q,\
        quotedid_smb.q,\
        reduce_deduplicate.q,\
        remote_script.q,\
    @@ -45,7 +42,6 @@ minimr.query.files=auto_sortmerge_join_16.q,\
        schemeAuthority2.q,\
        scriptfile1.q,\
        scriptfile1_win.q,\
    - smb_mapjoin_8.q,\
        stats_counter.q,\
        stats_counter_partitioned.q,\
        table_nonprintable.q,\
    @@ -54,6 +50,20 @@ minimr.query.files=auto_sortmerge_join_16.q,\
        uber_reduce.q,\
        udf_using.q

    +# These tests are disabled for minimr
    +# ql_rewrite_gbtoidx.q,\
    +# ql_rewrite_gbtoidx_cbo_1.q,\
    +# ql_rewrite_gbtoidx_cbo_2.q,\
    +# smb_mapjoin_8.q,\
    +
    +
    +# Tests that are not enabled for CLI Driver
    +disabled.query.files=ql_rewrite_gbtoidx.q,\
    + ql_rewrite_gbtoidx_cbo_1.q,\
    + ql_rewrite_gbtoidx_cbo_2.q,\
    + rcfile_merge1.q,\
    + smb_mapjoin_8.q
    +
      minitez.query.files.shared=alter_merge_2_orc.q,\
        alter_merge_orc.q,\
        alter_merge_stats_orc.q,\
    @@ -1171,8 +1181,6 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
        load_hdfs_file_with_space_in_the_name.q,\
        optrstat_groupby.q,\
        parallel_orderby.q,\
    - ql_rewrite_gbtoidx.q,\
    - ql_rewrite_gbtoidx_cbo_1.q,\
        quotedid_smb.q,\
        reduce_deduplicate.q,\
        remote_script.q,\
    @@ -1181,13 +1189,18 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
        schemeAuthority2.q,\
        scriptfile1.q,\
        scriptfile1_win.q,\
    - smb_mapjoin_8.q,\
        stats_counter.q,\
        stats_counter_partitioned.q,\
        temp_table_external.q,\
        truncate_column_buckets.q,\
        uber_reduce.q

    +# These tests are removed from miniSparkOnYarn.query.files
    +# ql_rewrite_gbtoidx.q,\
    +# ql_rewrite_gbtoidx_cbo_1.q,\
    +# smb_mapjoin_8.q,\
    +
    +
      spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
        groupby2_multi_distinct.q,\
        groupby3_map_skew_multi_distinct.q,\
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11984: Add HS2 open operation metrics (addendum) (Jimmy, reviewed by Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/265e42c5
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/265e42c5
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/265e42c5

    Branch: refs/heads/llap
    Commit: 265e42c5f45f7c160861132bbfa6ab78a85830e9
    Parents: e9b4d7e
    Author: Jimmy Xiang <jxiang@cloudera.com>
    Authored: Wed Sep 30 10:39:08 2015 -0700
    Committer: Jimmy Xiang <jxiang@cloudera.com>
    Committed: Wed Sep 30 10:40:46 2015 -0700

    ----------------------------------------------------------------------
      .../src/java/org/apache/hive/service/cli/operation/Operation.java | 2 +-
      1 file changed, 1 insertion(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/265e42c5/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    index 0ab38c9..515299c 100644
    --- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    +++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
    @@ -254,7 +254,7 @@ public abstract class Operation {
         */
        protected abstract void runInternal() throws HiveSQLException;

    - public final void run() throws HiveSQLException {
    + public void run() throws HiveSQLException {
          beforeRun();
          try {
            Metrics metrics = MetricsFactory.getInstance();
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11964: RelOptHiveTable.hiveColStatsMap might contain mismatched column stats (Chaoyu Tang, reviewed by Laljo John Pullokkaran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1f087893
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1f087893
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1f087893

    Branch: refs/heads/llap
    Commit: 1f08789395db979c1ca298fbaf7a46abfb7d4e1f
    Parents: 1cb3073
    Author: ctang <ctang.ma@gmail.com>
    Authored: Wed Sep 30 12:15:16 2015 -0400
    Committer: ctang <ctang.ma@gmail.com>
    Committed: Wed Sep 30 12:15:16 2015 -0400

    ----------------------------------------------------------------------
      .../hive/ql/optimizer/calcite/RelOptHiveTable.java | 15 +++++++++++++++
      1 file changed, 15 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/1f087893/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
    index 6c0bd25..1bd241b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
    @@ -312,6 +312,19 @@ public class RelOptHiveTable extends RelOptAbstractTable {
                setOfFiledCols.removeAll(setOfObtainedColStats);

                colNamesFailedStats.addAll(setOfFiledCols);
    + } else {
    + // Column stats in hiveColStats might not be in the same order as the columns in
    + // nonPartColNamesThatRqrStats. reorder hiveColStats so we can build hiveColStatsMap
    + // using nonPartColIndxsThatRqrStats as below
    + Map<String, ColStatistics> columnStatsMap =
    + new HashMap<String, ColStatistics>(hiveColStats.size());
    + for (ColStatistics cs : hiveColStats) {
    + columnStatsMap.put(cs.getColumnName(), cs);
    + }
    + hiveColStats.clear();
    + for (String colName : nonPartColNamesThatRqrStats) {
    + hiveColStats.add(columnStatsMap.get(colName));
    + }
              }
            } else {
              // 2.2 Obtain col stats for partitioned table.
    @@ -349,6 +362,8 @@ public class RelOptHiveTable extends RelOptAbstractTable {

            if (hiveColStats != null && hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
              for (int i = 0; i < hiveColStats.size(); i++) {
    + // the columns in nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
    + // are in same order
                hiveColStatsMap.put(nonPartColIndxsThatRqrStats.get(i), hiveColStats.get(i));
              }
            }
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11823 : create a self-contained translation for SARG to be used by metastore (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/064e37c4
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/064e37c4
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/064e37c4

    Branch: refs/heads/llap
    Commit: 064e37c460d1c464431f740e480a6f08353d69e6
    Parents: 8c8cc19
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Wed Sep 30 11:35:36 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Wed Sep 30 11:35:47 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 49 ++++++++++++++++++++
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 35 ++++++++++++++
      2 files changed, 84 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    index 52e1b06..c45b6e6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
      import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
      import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
    +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
      import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
      import org.apache.hadoop.hive.serde2.SerDeStats;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    @@ -254,6 +255,40 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
          }
        }

    + /**
    + * Modifies the SARG, replacing column names with column indexes in target table schema. This
    + * basically does the same thing as all the shennannigans with included columns, except for the
    + * last step where ORC gets direct subtypes of root column and uses the ordered match to map
    + * table columns to file columns. The numbers put into predicate leaf should allow to go into
    + * said subtypes directly by index to get the proper index in the file.
    + * This won't work with schema evolution, although it's probably much easier to reason about
    + * if schema evolution was to be supported, because this is a clear boundary between table
    + * schema columns and all things ORC. None of the ORC stuff is used here and none of the
    + * table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply
    + * the SARG thus modified.
    + */
    + public static void translateSargToTableColIndexes(
    + SearchArgument sarg, Configuration conf, int rootColumn) {
    + String nameStr = getNeededColumnNamesString(conf), idStr = getSargColumnIDsString(conf);
    + String[] knownNames = nameStr.split(",");
    + String[] idStrs = (idStr == null) ? null : idStr.split(",");
    + assert idStrs == null || knownNames.length == idStrs.length;
    + HashMap<String, Integer> nameIdMap = new HashMap<>();
    + for (int i = 0; i < knownNames.length; ++i) {
    + nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
    + }
    + List<PredicateLeaf> leaves = sarg.getLeaves();
    + for (int i = 0; i < leaves.size(); ++i) {
    + PredicateLeaf pl = leaves.get(i);
    + Integer colId = nameIdMap.get(pl.getColumnName());
    + String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId);
    + SearchArgumentFactory.setPredicateLeafColumn(pl, newColName);
    + }
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("SARG translated into " + sarg);
    + }
    + }
    +
        public static boolean[] genIncludedColumns(
            List<OrcProto.Type> types, List<Integer> included, boolean isOriginal) {
          int rootColumn = getRootColumn(isOriginal);
    @@ -1342,6 +1377,20 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
              directory);
        }

    + public static boolean[] pickStripesViaTranslatedSarg(SearchArgument sarg,
    + WriterVersion writerVersion, List<OrcProto.Type> types,
    + List<StripeStatistics> stripeStats, int stripeCount) {
    + LOG.info("Translated ORC pushdown predicate: " + sarg);
    + assert sarg != null;
    + if (stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) {
    + return null; // only do split pruning if HIVE-8732 has been fixed in the writer
    + }
    + // eliminate stripes that doesn't satisfy the predicate condition
    + List<PredicateLeaf> sargLeaves = sarg.getLeaves();
    + int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves);
    + return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null);
    + }
    +
        private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames,
            WriterVersion writerVersion, boolean isOriginal, List<StripeStatistics> stripeStats,
            int stripeCount, Path filePath) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/064e37c4/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    index ba304ba..77d2cc6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    @@ -1221,4 +1221,39 @@ class RecordReaderImpl implements RecordReader {
          // if we aren't to the right row yet, advance in the stripe.
          advanceToNextRow(reader, rowNumber, true);
        }
    +
    + private static final String TRANSLATED_SARG_SEPARATOR = "_";
    + public static String encodeTranslatedSargColumn(int rootColumn, Integer indexInSourceTable) {
    + return rootColumn + TRANSLATED_SARG_SEPARATOR
    + + ((indexInSourceTable == null) ? -1 : indexInSourceTable);
    + }
    +
    + public static int[] mapTranslatedSargColumns(
    + List<OrcProto.Type> types, List<PredicateLeaf> sargLeaves) {
    + int[] result = new int[sargLeaves.size()];
    + OrcProto.Type lastRoot = null; // Root will be the same for everyone as of now.
    + String lastRootStr = null;
    + for (int i = 0; i < result.length; ++i) {
    + String[] rootAndIndex = sargLeaves.get(i).getColumnName().split(TRANSLATED_SARG_SEPARATOR);
    + assert rootAndIndex.length == 2;
    + String rootStr = rootAndIndex[0], indexStr = rootAndIndex[1];
    + int index = Integer.parseInt(indexStr);
    + // First, check if the column even maps to anything.
    + if (index == -1) {
    + result[i] = -1;
    + continue;
    + }
    + assert index >= 0;
    + // Then, find the root type if needed.
    + if (!rootStr.equals(lastRootStr)) {
    + lastRoot = types.get(Integer.parseInt(rootStr));
    + lastRootStr = rootStr;
    + }
    + // Subtypes of the root types correspond, in order, to the columns in the table schema
    + // (disregarding schema evolution that doesn't presently work). Get the index for the
    + // corresponding subtype.
    + result[i] = lastRoot.getSubtypes(index);
    + }
    + return result;
    + }
      }
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11920: ADD JAR failing with URL schemes other than file/ivy/hdfs (Jason Dere, reviewed by Hari Subramaniyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8c8cc19f
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8c8cc19f
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8c8cc19f

    Branch: refs/heads/llap
    Commit: 8c8cc19fd4b2ddfc616905a021fd0588878ab121
    Parents: 265e42c
    Author: Jason Dere <jdere@hortonworks.com>
    Authored: Wed Sep 30 11:21:31 2015 -0700
    Committer: Jason Dere <jdere@hortonworks.com>
    Committed: Wed Sep 30 11:21:31 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/ql/session/SessionState.java | 9 ++-------
      ql/src/test/queries/clientpositive/add_jar_pfile.q | 8 ++++++++
      ql/src/test/results/clientpositive/add_jar_pfile.q.out | 12 ++++++++++++
      3 files changed, 22 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    index 0bd347c..dc8c336 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    @@ -1252,11 +1252,8 @@ public class SessionState {
          String scheme = uri.getScheme() == null ? null : uri.getScheme().toLowerCase();
          if (scheme == null || scheme.equals("file")) {
            return "file";
    - } else if (scheme.equals("hdfs") || scheme.equals("ivy")) {
    - return scheme;
    - } else {
    - throw new RuntimeException("invalid url: " + uri + ", expecting ( file | hdfs | ivy) as url scheme. ");
          }
    + return scheme;
        }

        List<URI> resolveAndDownload(ResourceType t, String value, boolean convertToUnix) throws URISyntaxException,
    @@ -1266,10 +1263,8 @@ public class SessionState {
            return Arrays.asList(uri);
          } else if (getURLType(value).equals("ivy")) {
            return dependencyResolver.downloadDependencies(uri);
    - } else if (getURLType(value).equals("hdfs")) {
    - return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
          } else {
    - throw new RuntimeException("Invalid url " + uri);
    + return Arrays.asList(createURI(downloadResource(value, convertToUnix)));
          }
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/test/queries/clientpositive/add_jar_pfile.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/add_jar_pfile.q b/ql/src/test/queries/clientpositive/add_jar_pfile.q
    new file mode 100644
    index 0000000..ed55518
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/add_jar_pfile.q
    @@ -0,0 +1,8 @@
    +
    +dfs -copyFromLocal ${system:maven.local.repository}/org/apache/hive/hive-contrib/${system:hive.version}/hive-contrib-${system:hive.version}.jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
    +
    +add jar pfile://${system:test.tmp.dir}/hive-contrib-${system:hive.version}.jar;
    +
    +CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd';
    +
    +DROP TEMPORARY FUNCTION example_add;

    http://git-wip-us.apache.org/repos/asf/hive/blob/8c8cc19f/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/add_jar_pfile.q.out b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    new file mode 100644
    index 0000000..60c65cc
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/add_jar_pfile.q.out
    @@ -0,0 +1,12 @@
    +PREHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
    +PREHOOK: type: CREATEFUNCTION
    +PREHOOK: Output: example_add
    +POSTHOOK: query: CREATE TEMPORARY FUNCTION example_add AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd'
    +POSTHOOK: type: CREATEFUNCTION
    +POSTHOOK: Output: example_add
    +PREHOOK: query: DROP TEMPORARY FUNCTION example_add
    +PREHOOK: type: DROPFUNCTION
    +PREHOOK: Output: example_add
    +POSTHOOK: query: DROP TEMPORARY FUNCTION example_add
    +POSTHOOK: type: DROPFUNCTION
    +POSTHOOK: Output: example_add
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-12000 : LLAP: Merge master into branch (Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f272ccb2
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f272ccb2
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f272ccb2

    Branch: refs/heads/llap
    Commit: f272ccb25bc495d600a713058f6c2082ebd6d966
    Parents: 7273a4c 064e37c
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Wed Sep 30 12:22:32 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Wed Sep 30 12:22:32 2015 -0700

    ----------------------------------------------------------------------
      .../common/metrics/common/MetricsConstant.java | 1 +
      .../hive/hcatalog/templeton/AppConfig.java | 21 +++
      .../apache/hive/hcatalog/templeton/Server.java | 12 +-
      itests/qtest/pom.xml | 2 +-
      .../test/resources/testconfiguration.properties | 27 ++-
      .../hadoop/hive/ql/io/orc/MetadataReader.java | 2 +-
      .../hive/ql/io/orc/MetadataReaderImpl.java | 20 ++-
      .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 49 ++++++
      .../hadoop/hive/ql/io/orc/ReaderImpl.java | 12 +-
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 35 ++++
      .../hive/ql/io/orc/RecordReaderUtils.java | 5 +-
      .../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
      .../ql/optimizer/calcite/RelOptHiveTable.java | 15 ++
      .../ql/optimizer/physical/Vectorizer.java.rej | 86 ----------
      .../hadoop/hive/ql/session/SessionState.java | 9 +-
      .../test/queries/clientpositive/add_jar_pfile.q | 8 +
      .../clientpositive/metadata_only_queries.q | 15 ++
      .../results/clientpositive/add_jar_pfile.q.out | 12 ++
      .../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
      .../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
      .../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
      .../tez/vector_groupby_reduce.q.out | 70 +++++---
      .../clientpositive/vector_groupby_reduce.q.out | 69 +++++---
      .../hive/service/cli/operation/Operation.java | 11 ++
      .../service/cli/operation/OperationManager.java | 11 ++
      .../service/cli/session/HiveSessionProxy.java | 6 +
      .../service/cli/session/SessionManager.java | 6 +-
      .../thrift/EmbeddedThriftBinaryCLIService.java | 2 +-
      .../thrift/ThreadPoolExecutorWithOomHook.java | 55 ++++++
      .../cli/thrift/ThriftBinaryCLIService.java | 12 +-
      .../service/cli/thrift/ThriftCLIService.java | 3 +
      .../cli/thrift/ThriftHttpCLIService.java | 10 +-
      .../apache/hive/service/server/HiveServer2.java | 12 +-
      .../hive/service/auth/TestPlainSaslHelper.java | 2 +-
      .../session/TestPluggableHiveSessionImpl.java | 2 +-
      .../cli/session/TestSessionGlobalInitFile.java | 2 +-
      36 files changed, 957 insertions(+), 191 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/itests/qtest/pom.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    index 35cc05f,43d2933..cea324c
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    @@@ -18,17 -18,102 +18,17 @@@
       package org.apache.hadoop.hive.ql.io.orc;

       import java.io.IOException;
      -import java.nio.ByteBuffer;
      -import java.util.List;

      -import org.apache.hadoop.fs.FSDataInputStream;
      -import org.apache.hadoop.fs.FileSystem;
      -import org.apache.hadoop.fs.Path;
      -import org.apache.hadoop.hive.common.DiskRange;
      -import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
      +import org.apache.hadoop.hive.ql.io.orc.OrcProto.BloomFilterIndex;
      +import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndex;
      +import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeFooter;

      -import com.google.common.collect.Lists;
      +public interface MetadataReader {
      + RecordReaderImpl.Index readRowIndex(StripeInformation stripe, StripeFooter footer,
      + boolean[] included, RowIndex[] indexes, boolean[] sargColumns,
      + BloomFilterIndex[] bloomFilterIndices) throws IOException;

      -public class MetadataReader {
      - private final FSDataInputStream file;
      - private final CompressionCodec codec;
      - private final int bufferSize;
      - private final int typeCount;
      + StripeFooter readStripeFooter(StripeInformation stripe) throws IOException;

      - public MetadataReader(FileSystem fileSystem, Path path,
      - CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
      - this(fileSystem.open(path), codec, bufferSize, typeCount);
      - }
      -
      - public MetadataReader(FSDataInputStream file,
      - CompressionCodec codec, int bufferSize, int typeCount) {
      - this.file = file;
      - this.codec = codec;
      - this.bufferSize = bufferSize;
      - this.typeCount = typeCount;
      - }
      -
      - public RecordReaderImpl.Index readRowIndex(StripeInformation stripe, OrcProto.StripeFooter footer,
      - boolean[] included, OrcProto.RowIndex[] indexes, boolean[] sargColumns,
      - OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
      - if (footer == null) {
      - footer = readStripeFooter(stripe);
      - }
      - if (indexes == null) {
      - indexes = new OrcProto.RowIndex[typeCount];
      - }
      - if (bloomFilterIndices == null) {
      - bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
      - }
      - long offset = stripe.getOffset();
      - List<OrcProto.Stream> streams = footer.getStreamsList();
      - for (int i = 0; i < streams.size(); i++) {
      - OrcProto.Stream stream = streams.get(i);
      - OrcProto.Stream nextStream = null;
      - if (i < streams.size() - 1) {
      - nextStream = streams.get(i+1);
      - }
      - int col = stream.getColumn();
      - int len = (int) stream.getLength();
      - // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
      - // filter and combine the io to read row index and bloom filters for that column together
      - if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
      - boolean readBloomFilter = false;
      - if (sargColumns != null && sargColumns[col] &&
      - nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
      - len += nextStream.getLength();
      - i += 1;
      - readBloomFilter = true;
      - }
      - if ((included == null || included[col]) && indexes[col] == null) {
      - byte[] buffer = new byte[len];
      - file.readFully(offset, buffer, 0, buffer.length);
      - ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
      - indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
      - bb, new long[]{0}, stream.getLength(), codec, bufferSize));
      - if (readBloomFilter) {
      - bb[0].position((int) stream.getLength());
      - bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(
      - InStream.create("bloom_filter", bb, new long[]{0}, nextStream.getLength(),
      - codec, bufferSize));
      - }
      - }
      - }
      - offset += len;
      - }
      -
      - RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
      - return index;
      - }
      -
      - public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
      - long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
      - int tailLength = (int) stripe.getFooterLength();
      -
      - // read the footer
      - ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
      - file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
      - return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
      - Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
      - tailLength, codec, bufferSize));
      - }
      -
      - public void close() throws IOException {
      - file.close();
      - }
      + void close() throws IOException;
    - }
    + }

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
    index 83594f7,0000000..1456df3
    mode 100644,000000..100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
    @@@ -1,119 -1,0 +1,123 @@@
      +/**
      + * Licensed to the Apache Software Foundation (ASF) under one
      + * or more contributor license agreements. See the NOTICE file
      + * distributed with this work for additional information
      + * regarding copyright ownership. The ASF licenses this file
      + * to you under the Apache License, Version 2.0 (the
      + * "License"); you may not use this file except in compliance
      + * with the License. You may obtain a copy of the License at
      + *
      + * http://www.apache.org/licenses/LICENSE-2.0
      + *
      + * Unless required by applicable law or agreed to in writing, software
      + * distributed under the License is distributed on an "AS IS" BASIS,
      + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      + * See the License for the specific language governing permissions and
      + * limitations under the License.
      + */
      +package org.apache.hadoop.hive.ql.io.orc;
      +
      +import java.io.IOException;
      +import java.nio.ByteBuffer;
      +import java.util.List;
      +
      +import org.apache.hadoop.fs.FSDataInputStream;
      +import org.apache.hadoop.fs.FileSystem;
      +import org.apache.hadoop.fs.Path;
      +import org.apache.hadoop.hive.common.io.DiskRange;
      +import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
      +
      +import com.google.common.collect.Lists;
      +
      +public class MetadataReaderImpl implements MetadataReader {
      + private final FSDataInputStream file;
      + private final CompressionCodec codec;
      + private final int bufferSize;
      + private final int typeCount;
      +
    - public MetadataReaderImpl(FileSystem fileSystem, Path path, CompressionCodec codec,
    - int bufferSize, int typeCount) throws IOException {
    - this.file = fileSystem.open(path);
    ++ public MetadataReaderImpl(FileSystem fileSystem, Path path,
    ++ CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
    ++ this(fileSystem.open(path), codec, bufferSize, typeCount);
    ++ }
    ++
    ++ public MetadataReaderImpl(FSDataInputStream file,
    ++ CompressionCodec codec, int bufferSize, int typeCount) {
    ++ this.file = file;
      + this.codec = codec;
      + this.bufferSize = bufferSize;
      + this.typeCount = typeCount;
      + }
      +
      + @Override
      + public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
      + OrcProto.StripeFooter footer, boolean[] included, OrcProto.RowIndex[] indexes,
      + boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
      + if (footer == null) {
      + footer = readStripeFooter(stripe);
      + }
      + if (indexes == null) {
      + indexes = new OrcProto.RowIndex[typeCount];
      + }
      + if (bloomFilterIndices == null) {
      + bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
      + }
      + long offset = stripe.getOffset();
      + List<OrcProto.Stream> streams = footer.getStreamsList();
      + for (int i = 0; i < streams.size(); i++) {
      + OrcProto.Stream stream = streams.get(i);
      + OrcProto.Stream nextStream = null;
      + if (i < streams.size() - 1) {
      + nextStream = streams.get(i+1);
      + }
      + int col = stream.getColumn();
      + int len = (int) stream.getLength();
      + // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
      + // filter and combine the io to read row index and bloom filters for that column together
      + if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
      + boolean readBloomFilter = false;
      + if (sargColumns != null && sargColumns[col] &&
      + nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
      + len += nextStream.getLength();
      + i += 1;
      + readBloomFilter = true;
      + }
      + if ((included == null || included[col]) && indexes[col] == null) {
      + byte[] buffer = new byte[len];
    ++ file.readFully(offset, buffer, 0, buffer.length);
      + ByteBuffer bb = ByteBuffer.wrap(buffer);
    - file.seek(offset);
    - file.readFully(buffer);
      + indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create(null, "index",
      + Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
    - codec, bufferSize));
    ++ codec, bufferSize));
      + if (readBloomFilter) {
      + bb.position((int) stream.getLength());
      + bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
      + null, "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
      + nextStream.getLength(), codec, bufferSize));
      + }
      + }
      + }
      + offset += len;
      + }
      +
      + RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
      + return index;
      + }
      +
      + @Override
      + public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
      + long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
      + int tailLength = (int) stripe.getFooterLength();
    ++
      + // read the footer
      + ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
    - file.seek(offset);
    - file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
    ++ file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
      + return OrcProto.StripeFooter.parseFrom(InStream.create(null, "footer",
      + Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
      + tailLength, codec, bufferSize));
      + }
      +
      + @Override
      + public void close() throws IOException {
      + file.close();
      + }
      +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/f272ccb2/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out
    ----------------------------------------------------------------------
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567

    Branch: refs/heads/llap
    Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
    Parents: 6a8d7e4
    Author: Pengcheng Xiong <pxiong@apache.org>
    Authored: Tue Sep 29 17:47:39 2015 -0700
    Committer: Pengcheng Xiong <pxiong@apache.org>
    Committed: Tue Sep 29 17:47:39 2015 -0700

    ----------------------------------------------------------------------
      .../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
      .../clientpositive/metadata_only_queries.q | 15 ++
      .../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
      .../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
      .../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
      5 files changed, 552 insertions(+), 7 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    index bc8d8f7..5a21e6b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;

      import java.util.ArrayList;
      import java.util.Collection;
    +import java.util.HashMap;
    +import java.util.HashSet;
      import java.util.LinkedHashMap;
      import java.util.List;
      import java.util.Map;
    @@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
                return null;
              }
              Operator<?> last = (Operator<?>) stack.get(5);
    + SelectOperator cselOp = null;
    + Map<Integer,Object> posToConstant = new HashMap<>();
              if (last instanceof SelectOperator) {
    - SelectOperator cselOp = (SelectOperator) last;
    + cselOp = (SelectOperator) last;
                if (!cselOp.isIdentitySelect()) {
    - return null; // todo we can do further by providing operator to fetch task
    + for (int pos = 0; pos < cselOp.getConf().getColList().size(); pos++) {
    + ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
    + if (desc instanceof ExprNodeConstantDesc) {
    + //We store the position to the constant value for later use.
    + posToConstant.put(pos, ((ExprNodeConstantDesc)desc).getValue());
    + } else {
    + if (!(desc instanceof ExprNodeColumnDesc)) {
    + // Probably an expression, cant handle that
    + return null;
    + }
    + }
    + }
                }
                last = (Operator<?>) stack.get(6);
              }
    @@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {


              List<List<Object>> allRows = new ArrayList<List<Object>>();
    - allRows.add(oneRow);
    -
              List<String> colNames = new ArrayList<String>();
              List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
    - for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
    - colNames.add(colInfo.getInternalName());
    - ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + if (cselOp == null) {
    + allRows.add(oneRow);
    + for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
    + colNames.add(colInfo.getInternalName());
    + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + }
    + } else {
    + int aggrPos = 0;
    + List<Object> oneRowWithConstant = new ArrayList<>();
    + for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) {
    + if (posToConstant.containsKey(pos)) {
    + // This position is a constant.
    + oneRowWithConstant.add(posToConstant.get(pos));
    + } else {
    + // This position is an aggregation.
    + oneRowWithConstant.add(oneRow.get(aggrPos++));
    + }
    + ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
    + colNames.add(colInfo.getInternalName());
    + ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
    + }
    + allRows.add(oneRowWithConstant);
              }
              StandardStructObjectInspector sOI = ObjectInspectorFactory.
                  getStandardStructObjectInspector(colNames, ois);

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
    index 56f3a78..70fac92 100644
    --- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
    +++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
    @@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), co
      explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;

    +explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
    +explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
    +
      analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
      analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
      analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
    @@ -69,6 +74,12 @@ explain
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;

    +explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
    +
    +
    +
      explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
    @@ -76,6 +87,10 @@ explain
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
      select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;

    +explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
    +
      explain select count(ts) from stats_tbl_part;

      drop table stats_tbl;

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    index 2dcd437..65a4dfa 100644
    --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
    @@ -276,6 +276,114 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -364,6 +472,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -414,6 +547,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    index b2221fc..0d85f4e 100644
    --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
    @@ -288,6 +288,126 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 2 <- Map 1 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    + Edges:
    + Reducer 2 <- Map 1 (GROUP, 1)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part

    http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    index f43440e..ab86ab0 100644
    --- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    +++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    @@ -288,6 +288,126 @@ STAGE PLANS:
            Processor Tree:
              ListSink

    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: stats_tbl_part
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
    + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
    + Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
    + mode: hash
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
      PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
      PREHOOK: type: QUERY
      PREHOOK: Input: default@stats_tbl
    @@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain
      select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
      PREHOOK: type: QUERY
    @@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
      POSTHOOK: Input: default@stats_tbl_part
      #### A masked pattern was here ####
      65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
    +PREHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-0 is a root stage
    +
    +STAGE PLANS:
    + Stage: Stage-0
    + Fetch Operator
    + limit: 1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@stats_tbl_part
    +#### A masked pattern was here ####
    +65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
      PREHOOK: query: explain select count(ts) from stats_tbl_part
      PREHOOK: type: QUERY
      POSTHOOK: query: explain select count(ts) from stats_tbl_part
  • Sershe at Sep 30, 2015 at 7:29 pm
    HIVE-11819 : HiveServer2 catches OOMs on request threads (Sergey Shelukhin, reviewed by Vaibhav Gumashta)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a8d7e4c
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a8d7e4c
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a8d7e4c

    Branch: refs/heads/llap
    Commit: 6a8d7e4cd55e5317aeb5a71005e5c98e09b22cc2
    Parents: e1ce9a2
    Author: Sergey Shelukhin <sershe@apache.org>
    Authored: Tue Sep 29 15:42:23 2015 -0700
    Committer: Sergey Shelukhin <sershe@apache.org>
    Committed: Tue Sep 29 15:42:23 2015 -0700

    ----------------------------------------------------------------------
      .../service/cli/session/HiveSessionProxy.java | 6 +++
      .../thrift/EmbeddedThriftBinaryCLIService.java | 2 +-
      .../thrift/ThreadPoolExecutorWithOomHook.java | 55 ++++++++++++++++++++
      .../cli/thrift/ThriftBinaryCLIService.java | 12 +++--
      .../service/cli/thrift/ThriftCLIService.java | 3 ++
      .../cli/thrift/ThriftHttpCLIService.java | 10 ++--
      .../apache/hive/service/server/HiveServer2.java | 12 +++--
      .../hive/service/auth/TestPlainSaslHelper.java | 2 +-
      .../session/TestPluggableHiveSessionImpl.java | 2 +-
      .../cli/session/TestSessionGlobalInitFile.java | 2 +-
      10 files changed, 90 insertions(+), 16 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
    index 5b10521..433f14e 100644
    --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
    +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java
    @@ -79,6 +79,12 @@ public class HiveSessionProxy implements InvocationHandler {
          } catch (InvocationTargetException e) {
            if (e.getCause() instanceof HiveSQLException) {
              throw (HiveSQLException)e.getCause();
    + } else if (e.getCause() instanceof OutOfMemoryError) {
    + throw (OutOfMemoryError)e.getCause();
    + } else if (e.getCause() instanceof Error) {
    + // TODO: maybe we should throw this as-is too. ThriftCLIService currently catches Exception,
    + // so the combination determines what would kill the HS2 executor thread. For now,
    + // let's only allow OOM to propagate.
            }
            throw new RuntimeException(e.getCause());
          } catch (IllegalArgumentException e) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    index a57fc8f..e9a5830 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    @@ -30,7 +30,7 @@ import org.apache.hive.service.cli.ICLIService;
      public class EmbeddedThriftBinaryCLIService extends ThriftBinaryCLIService {

        public EmbeddedThriftBinaryCLIService() {
    - super(new CLIService(null));
    + super(new CLIService(null), null);
          isEmbedded = true;
          HiveConf.setLoadHiveServer2Config(true);
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java b/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
    new file mode 100644
    index 0000000..51731ad
    --- /dev/null
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThreadPoolExecutorWithOomHook.java
    @@ -0,0 +1,55 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hive.service.cli.thrift;
    +
    +import java.util.concurrent.BlockingQueue;
    +import java.util.concurrent.Future;
    +import java.util.concurrent.ThreadFactory;
    +import java.util.concurrent.ThreadPoolExecutor;
    +import java.util.concurrent.TimeUnit;
    +
    +final class ThreadPoolExecutorWithOomHook extends ThreadPoolExecutor {
    + private final Runnable oomHook;
    +
    + public ThreadPoolExecutorWithOomHook(int corePoolSize, int maximumPoolSize, long keepAliveTime,
    + TimeUnit unit, BlockingQueue<Runnable> workQueue, ThreadFactory threadFactory,
    + Runnable oomHook) {
    + super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory);
    + this.oomHook = oomHook;
    + }
    +
    + @Override
    + protected void afterExecute(Runnable r, Throwable t) {
    + super.afterExecute(r, t);
    + if (t == null && r instanceof Future<?>) {
    + try {
    + Future<?> future = (Future<?>) r;
    + if (future.isDone()) {
    + future.get();
    + }
    + } catch (InterruptedException ie) {
    + Thread.currentThread().interrupt();
    + } catch (Throwable t2) {
    + t = t2;
    + }
    + }
    + if (t instanceof OutOfMemoryError) {
    + oomHook.run();
    + }
    + }
    +}
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    index 6c9efba..54f9914 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    @@ -22,7 +22,6 @@ import java.util.ArrayList;
      import java.util.List;
      import java.util.concurrent.ExecutorService;
      import java.util.concurrent.SynchronousQueue;
    -import java.util.concurrent.ThreadPoolExecutor;
      import java.util.concurrent.TimeUnit;

      import org.apache.hadoop.hive.conf.HiveConf;
    @@ -39,9 +38,11 @@ import org.apache.thrift.transport.TTransportFactory;


      public class ThriftBinaryCLIService extends ThriftCLIService {
    + private final Runnable oomHook;

    - public ThriftBinaryCLIService(CLIService cliService) {
    + public ThriftBinaryCLIService(CLIService cliService, Runnable oomHook) {
          super(cliService, ThriftBinaryCLIService.class.getSimpleName());
    + this.oomHook = oomHook;
        }

        @Override
    @@ -49,9 +50,10 @@ public class ThriftBinaryCLIService extends ThriftCLIService {
          try {
            // Server thread pool
            String threadPoolName = "HiveServer2-Handler-Pool";
    - ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
    - workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
    - new ThreadFactoryWithGarbageCleanup(threadPoolName));
    + ExecutorService executorService = new ThreadPoolExecutorWithOomHook(minWorkerThreads,
    + maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS,
    + new SynchronousQueue<Runnable>(), new ThreadFactoryWithGarbageCleanup(threadPoolName),
    + oomHook);

            // Thrift configs
            hiveAuthFactory = new HiveAuthFactory(hiveConf);

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
    index 67bc778..1c3e899 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
    @@ -512,6 +512,9 @@ public abstract class ThriftCLIService extends AbstractService implements TCLISe
                resp.setOperationHandle(operationHandle.toTOperationHandle());
                resp.setStatus(OK_STATUS);
          } catch (Exception e) {
    + // Note: it's rather important that this (and other methods) catch Exception, not Throwable;
    + // in combination with HiveSessionProxy.invoke code, perhaps unintentionally, it used
    + // to also catch all errors; and now it allows OOMs only to propagate.
            LOG.warn("Error executing statement: ", e);
            resp.setStatus(HiveSQLException.toTStatus(e));
          }

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
    index 3b57efa..046958e 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
    @@ -46,9 +46,11 @@ import org.eclipse.jetty.util.thread.ExecutorThreadPool;


      public class ThriftHttpCLIService extends ThriftCLIService {
    + private final Runnable oomHook;

    - public ThriftHttpCLIService(CLIService cliService) {
    + public ThriftHttpCLIService(CLIService cliService, Runnable oomHook) {
          super(cliService, ThriftHttpCLIService.class.getSimpleName());
    + this.oomHook = oomHook;
        }

        /**
    @@ -65,9 +67,9 @@ public class ThriftHttpCLIService extends ThriftCLIService {
            // Server thread pool
            // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
            String threadPoolName = "HiveServer2-HttpHandler-Pool";
    - ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
    - workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
    - new ThreadFactoryWithGarbageCleanup(threadPoolName));
    + ExecutorService executorService = new ThreadPoolExecutorWithOomHook(minWorkerThreads,
    + maxWorkerThreads, workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
    + new ThreadFactoryWithGarbageCleanup(threadPoolName), oomHook);
            ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService);
            httpServer.setThreadPool(threadPool);


    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/java/org/apache/hive/service/server/HiveServer2.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
    index d7ba964..601c5db 100644
    --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
    +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
    @@ -97,10 +97,17 @@ public class HiveServer2 extends CompositeService {
        public synchronized void init(HiveConf hiveConf) {
          cliService = new CLIService(this);
          addService(cliService);
    + final HiveServer2 hiveServer2 = this;
    + Runnable oomHook = new Runnable() {
    + @Override
    + public void run() {
    + hiveServer2.stop();
    + }
    + };
          if (isHTTPTransportMode(hiveConf)) {
    - thriftCLIService = new ThriftHttpCLIService(cliService);
    + thriftCLIService = new ThriftHttpCLIService(cliService, oomHook);
          } else {
    - thriftCLIService = new ThriftBinaryCLIService(cliService);
    + thriftCLIService = new ThriftBinaryCLIService(cliService, oomHook);
          }
          addService(thriftCLIService);
          super.init(hiveConf);
    @@ -111,7 +118,6 @@ public class HiveServer2 extends CompositeService {
            throw new Error("Unable to intitialize HiveServer2", t);
          }
          // Add a shutdown hook for catching SIGTERM & SIGINT
    - final HiveServer2 hiveServer2 = this;
          Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
    ----------------------------------------------------------------------
    diff --git a/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java b/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
    index 03f3964..8ae0eeb 100644
    --- a/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
    +++ b/service/src/test/org/apache/hive/service/auth/TestPlainSaslHelper.java
    @@ -41,7 +41,7 @@ public class TestPlainSaslHelper extends TestCase {

          CLIService cliService = new CLIService(null);
          cliService.init(hconf);
    - ThriftCLIService tcliService = new ThriftBinaryCLIService(cliService);
    + ThriftCLIService tcliService = new ThriftBinaryCLIService(cliService, null);
          tcliService.init(hconf);
          TProcessorFactory procFactory = PlainSaslHelper.getPlainProcessorFactory(tcliService);
          assertEquals("doAs enabled processor for unsecure mode",

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
    ----------------------------------------------------------------------
    diff --git a/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java b/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
    index 8c7546c..f4bcbc3 100644
    --- a/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
    +++ b/service/src/test/org/apache/hive/service/cli/session/TestPluggableHiveSessionImpl.java
    @@ -26,7 +26,7 @@ public class TestPluggableHiveSessionImpl extends TestCase {
          hiveConf = new HiveConf();
          hiveConf.setVar(HiveConf.ConfVars.HIVE_SESSION_IMPL_CLASSNAME, TestHiveSessionImpl.class.getName());
          cliService = new CLIService(null);
    - service = new ThriftBinaryCLIService(cliService);
    + service = new ThriftBinaryCLIService(cliService, null);
          service.init(hiveConf);
          client = new ThriftCLIServiceClient(service);
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/6a8d7e4c/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
    ----------------------------------------------------------------------
    diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
    index 37b698b..840a551 100644
    --- a/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
    +++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionGlobalInitFile.java
    @@ -52,7 +52,7 @@ public class TestSessionGlobalInitFile extends TestCase {
         */
        private class FakeEmbeddedThriftBinaryCLIService extends ThriftBinaryCLIService {
          public FakeEmbeddedThriftBinaryCLIService(HiveConf hiveConf) {
    - super(new CLIService(null));
    + super(new CLIService(null), null);
            isEmbedded = true;
            cliService.init(hiveConf);
            cliService.start();
  • Sershe at Sep 30, 2015 at 7:30 pm
    HIVE-11945: ORC with non-local reads may not be reusing connection to DN (Rajesh Balamohan reviewed by Sergey Shelukhin, Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4c43f03
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4c43f03
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4c43f03

    Branch: refs/heads/llap
    Commit: a4c43f0335b33a75d2e9f3dc53b3cd33f8f115cf
    Parents: 3921458
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Mon Sep 28 20:10:50 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Mon Sep 28 20:10:50 2015 -0500

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/io/orc/MetadataReader.java | 6 ++----
      .../org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java | 12 +++++-------
      .../apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java | 5 +++--
      3 files changed, 10 insertions(+), 13 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    index cdc0372..43d2933 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
    @@ -82,8 +82,7 @@ public class MetadataReader {
              }
              if ((included == null || included[col]) && indexes[col] == null) {
                byte[] buffer = new byte[len];
    - file.seek(offset);
    - file.readFully(buffer);
    + file.readFully(offset, buffer, 0, buffer.length);
                ByteBuffer[] bb = new ByteBuffer[] {ByteBuffer.wrap(buffer)};
                indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
                    bb, new long[]{0}, stream.getLength(), codec, bufferSize));
    @@ -108,8 +107,7 @@ public class MetadataReader {

          // read the footer
          ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
    - file.seek(offset);
    - file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
    + file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
          return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
              Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
              tailLength, codec, bufferSize));

    http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    index ab539c4..23b3b55 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    @@ -244,9 +244,8 @@ public class ReaderImpl implements Reader {
          if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
            // If it isn't there, this may be the 0.11.0 version of ORC.
            // Read the first 3 bytes of the file to check for the header
    - in.seek(0);
            byte[] header = new byte[len];
    - in.readFully(header, 0, len);
    + in.readFully(0, header, 0, len);
            // if it isn't there, this isn't an ORC file
            if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
              throw new FileFormatException("Malformed ORC file " + path +
    @@ -472,10 +471,10 @@ public class ReaderImpl implements Reader {

          //read last bytes into buffer to get PostScript
          int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
    - file.seek(size - readSize);
          ByteBuffer buffer = ByteBuffer.allocate(readSize);
          assert buffer.position() == 0;
    - file.readFully(buffer.array(), buffer.arrayOffset(), readSize);
    + file.readFully((size - readSize),
    + buffer.array(), buffer.arrayOffset(), readSize);
          buffer.position(0);

          //read the PostScript
    @@ -495,10 +494,9 @@ public class ReaderImpl implements Reader {
          int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
          if (extra > 0) {
            //more bytes need to be read, seek back to the right place and read extra bytes
    - file.seek(size - readSize - extra);
            ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
    - file.readFully(extraBuf.array(),
    - extraBuf.arrayOffset() + extraBuf.position(), extra);
    + file.readFully((size - readSize - extra), extraBuf.array(),
    + extraBuf.arrayOffset() + extraBuf.position(), extra);
            extraBuf.position(extra);
            //append with already read bytes
            extraBuf.put(buffer);

    http://git-wip-us.apache.org/repos/asf/hive/blob/a4c43f03/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    index ded3979..9c9a1c0 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    @@ -246,8 +246,8 @@ public class RecordReaderUtils {
            }
            int len = (int) (range.getEnd() - range.getOffset());
            long off = range.getOffset();
    - file.seek(base + off);
            if (zcr != null) {
    + file.seek(base + off);
              boolean hasReplaced = false;
              while (len > 0) {
                ByteBuffer partial = zcr.readBuffer(len, false);
    @@ -264,12 +264,13 @@ public class RecordReaderUtils {
                off += read;
              }
            } else if (doForceDirect) {
    + file.seek(base + off);
              ByteBuffer directBuf = ByteBuffer.allocateDirect(len);
              readDirect(file, len, directBuf);
              range = range.replaceSelfWith(new BufferChunk(directBuf, range.getOffset()));
            } else {
              byte[] buffer = new byte[len];
    - file.readFully(buffer, 0, buffer.length);
    + file.readFully((base + off), buffer, 0, buffer.length);
              range = range.replaceSelfWith(new BufferChunk(ByteBuffer.wrap(buffer), range.getOffset()));
            }
            range = range.next;

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 29, '15 at 1:11a
activeSep 30, '15 at 7:30p
posts21
users5
websitehive.apache.org

People

Translate

site design / logo © 2021 Grokbase