FAQ
Repository: hive
Updated Branches:
   refs/heads/master 6f828383d -> d4f274867


HIVE-13200: Aggregation functions returning empty rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d4f27486
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d4f27486
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d4f27486

Branch: refs/heads/master
Commit: d4f2748670e8c14a50066e17148c4f2d8cd0bc39
Parents: 6f82838
Author: Yongzhi Chen <ychena@apache.org>
Authored: Thu Mar 3 11:55:37 2016 -0500
Committer: Yongzhi Chen <ychena@apache.org>
Committed: Sat Mar 5 10:56:50 2016 -0500

----------------------------------------------------------------------
  .../physical/MetadataOnlyOptimizer.java | 3 +-
  .../hadoop/hive/ql/plan/TableScanDesc.java | 16 ++
  .../test/queries/clientpositive/skiphf_aggr.q | 42 +++
  .../results/clientpositive/skiphf_aggr.q.out | 267 +++++++++++++++++++
  4 files changed, 327 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
index d47d3c2..5758282 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
@@ -119,7 +119,8 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver {
        boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
        boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
(desc.getVirtualCols().isEmpty());
- if (noColNeeded && noVCneeded) {
+ boolean isSkipHF = desc.isNeedSkipHeaderFooters();
+ if (noColNeeded && noVCneeded && !isSkipHF) {
          walkerCtx.setMayBeMetadataOnly(tsOp);
        }
        return nd;

http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index 5381247..8cf261d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
  import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
  import org.apache.hadoop.hive.ql.parse.TableSample;
  import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.serde.serdeConstants;


  /**
@@ -364,4 +365,19 @@ public class TableScanDesc extends AbstractOperatorDesc {
    public void setNumBuckets(int numBuckets) {
      this.numBuckets = numBuckets;
    }
+
+ public boolean isNeedSkipHeaderFooters() {
+ boolean rtn = false;
+ if (tableMetadata != null && tableMetadata.getTTable() != null) {
+ Map<String, String> params = tableMetadata.getTTable().getParameters();
+ if (params != null) {
+ String skipHVal = params.get(serdeConstants.HEADER_COUNT);
+ int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal);
+ String skipFVal = params.get(serdeConstants.FOOTER_COUNT);
+ int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal);
+ rtn = (hcount != 0 || fcount !=0 );
+ }
+ }
+ return rtn;
+ }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/test/queries/clientpositive/skiphf_aggr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr.q b/ql/src/test/queries/clientpositive/skiphf_aggr.q
new file mode 100644
index 0000000..fcd0b35
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skiphf_aggr.q
@@ -0,0 +1,42 @@
+DROP TABLE IF EXISTS skipHTbl;
+
+CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1');
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipHTbl;
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MAX(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MIN(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+DROP TABLE IF EXISTS skipFTbl;
+
+CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1');
+
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipFTbl;
+
+SELECT DISTINCT b FROM skipFTbl;
+SELECT MAX(b) FROM skipFTbl;
+SELECT DISTINCT a FROM skipFTbl;
+
+DROP TABLE skipHTbl;
+DROP TABLE skipFTbl;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/test/results/clientpositive/skiphf_aggr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skiphf_aggr.q.out b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
new file mode 100644
index 0000000..aeb4b1b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
@@ -0,0 +1,267 @@
+PREHOOK: query: DROP TABLE IF EXISTS skipHTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipHTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipHTbl
+POSTHOOK: query: CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipHTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2 1
+3 1
+4 1
+2 2
+3 2
+4 2
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT MIN(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: DROP TABLE IF EXISTS skipFTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipFTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipFTbl
+POSTHOOK: query: CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipFTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@skipftbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@skipftbl@b=1
+POSTHOOK: Lineage: skipftbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__6
+PREHOOK: Output: default@skipftbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__6
+POSTHOOK: Output: default@skipftbl@b=2
+POSTHOOK: Lineage: skipftbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1 1
+2 1
+3 1
+1 2
+2 2
+3 2
+PREHOOK: query: SELECT DISTINCT b FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE skipHTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Output: default@skiphtbl
+POSTHOOK: query: DROP TABLE skipHTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Output: default@skiphtbl
+PREHOOK: query: DROP TABLE skipFTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skipftbl
+PREHOOK: Output: default@skipftbl
+POSTHOOK: query: DROP TABLE skipFTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Output: default@skipftbl

Search Discussions

Discussion Posts

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 5, '16 at 3:58p
activeMar 5, '16 at 4:10p
posts2
users1
websitehive.apache.org

1 user in discussion

Ychena: 2 posts

People

Translate

site design / logo © 2021 Grokbase