FAQ
Author: omalley
Date: Thu Nov 14 19:06:58 2013
New Revision: 1542024

URL: http://svn.apache.org/r1542024
Log:
HIVE-5601: NPE in ORC's PPD when using select * from table with where
predicate pushdown (Prasanth J via Owen O'Malley and Gunther Hagleitner)

Modified:
     hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
     hive/branches/branch-0.12/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
     hive/branches/branch-0.12/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1542024&r1=1542023&r2=1542024&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Thu Nov 14 19:06:58 2013
@@ -18,7 +18,14 @@

  package org.apache.hadoop.hive.ql.io.orc;

-import com.google.protobuf.CodedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
  import org.apache.hadoop.fs.FSDataInputStream;
@@ -28,12 +35,7 @@ import org.apache.hadoop.hive.ql.io.sarg
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  import org.apache.hadoop.io.Text;

-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.protobuf.CodedInputStream;

  final class ReaderImpl implements Reader {

@@ -332,6 +334,13 @@ final class ReaderImpl implements Reader
    public RecordReader rows(long offset, long length, boolean[] include,
                             SearchArgument sarg, String[] columnNames
                             ) throws IOException {
+
+ // if included columns is null, then include all columns
+ if (include == null) {
+ include = new boolean[footer.getTypesCount()];
+ Arrays.fill(include, true);
+ }
+
      return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
          length, footer.getTypesList(), codec, bufferSize,
          include, footer.getRowIndexStride(), sarg, columnNames);

Modified: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q?rev=1542024&r1=1542023&r2=1542024&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q (original)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/orc_predicate_pushdown.q Thu Nov 14 19:06:58 2013
@@ -49,6 +49,16 @@ SET hive.optimize.index.filter=false;
  -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
  -- in table scan operator.

+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t<2 limit 1;
+SET hive.optimize.index.filter=false;
+
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=true;
+SELECT * FROM orc_pred WHERE t>2 limit 1;
+SET hive.optimize.index.filter=false;
+
  SELECT SUM(HASH(t)) FROM orc_pred
    WHERE t IS NOT NULL
    AND t < 0

Modified: hive/branches/branch-0.12/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out?rev=1542024&r1=1542023&r2=1542024&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out (original)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out Thu Nov 14 19:06:58 2013
@@ -275,10 +275,7 @@ PREHOOK: query: -- all the following que
  -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
  -- in table scan operator.

-SELECT SUM(HASH(t)) FROM orc_pred
- WHERE t IS NOT NULL
- AND t < 0
- AND t > -2
+SELECT * FROM orc_pred WHERE t<2 limit 1
  PREHOOK: type: QUERY
  PREHOOK: Input: default@orc_pred
  #### A masked pattern was here ####
@@ -286,7 +283,90 @@ POSTHOOK: query: -- all the following qu
  -- hive.optimize.index.filter is set to true. the explain plan should show filter expression
  -- in table scan operator.

-SELECT SUM(HASH(t)) FROM orc_pred
+SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t<2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+-3 467 65575 4294967437 81.64 23.53 true tom hernandez 2013-03-01 09:11:58.703188 32.85 study skills
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_pred WHERE t>2 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: Lineage: orc_pred.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: orc_pred.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ]
+POSTHOOK: Lineage: orc_pred.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ]
+POSTHOOK: Lineage: orc_pred.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal, comment:null), ]
+POSTHOOK: Lineage: orc_pred.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ]
+POSTHOOK: Lineage: orc_pred.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_pred.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_pred.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: orc_pred.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ]
+124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty
+PREHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
+ WHERE t IS NOT NULL
+ AND t < 0
+ AND t > -2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_pred
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(t)) FROM orc_pred
    WHERE t IS NOT NULL
    AND t < 0
    AND t > -2

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedNov 14, '13 at 7:07p
activeNov 14, '13 at 7:07p
posts1
users1
websitehive.apache.org

1 user in discussion

Omalley: 1 post

People

Translate

site design / logo © 2021 Grokbase