FAQ
Author: gopalv
Date: Fri Jan 16 23:19:43 2015
New Revision: 1652543

URL: http://svn.apache.org/r1652543
Log:
HIVE-9255: SimpleFetchOptimizer for limited fetches without filters. (Gopal V, reviewed by Ashutosh Chauhan)

Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
     hive/trunk/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
     hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java?rev=1652543&r1=1652542&r2=1652543&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java Fri Jan 16 23:19:43 2015
@@ -143,8 +143,15 @@ public class SimpleFetchOptimizer implem
    }

    private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception {
- if (limit > 0 && data.hasOnlyPruningFilter()) {
- return true;
+ if (limit > 0) {
+ if (data.hasOnlyPruningFilter()) {
+ /* partitioned table + query has only pruning filters */
+ return true;
+ } else if (data.isPartitioned() == false && data.isFiltered() == false) {
+ /* unpartitioned table + no filters */
+ return true;
+ }
+ /* fall through */
      }
      long threshold = HiveConf.getLongVar(pctx.getConf(),
          HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
@@ -228,6 +235,10 @@ public class SimpleFetchOptimizer implem
        if (op.getChildOperators() == null || op.getChildOperators().size() != 1) {
          return null;
        }
+
+ if (op instanceof FilterOperator) {
+ fetch.setFiltered(true);
+ }
      }

      if (op instanceof FileSinkOperator) {
@@ -279,6 +290,11 @@ public class SimpleFetchOptimizer implem
operator instanceof ScriptOperator) {
        return false;
      }
+
+ if (operator instanceof FilterOperator) {
+ fetch.setFiltered(true);
+ }
+
      if (!traversed.add(operator)) {
        return true;
      }
@@ -314,6 +330,7 @@ public class SimpleFetchOptimizer implem

      // this is always non-null when conversion is completed
      private Operator<?> fileSink;
+ private boolean filtered;

      private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) {
        this.scanOp = scanOp;
@@ -337,10 +354,23 @@ public class SimpleFetchOptimizer implem
      /*
       * all filters were executed during partition pruning
       */
- public boolean hasOnlyPruningFilter() {
+ public final boolean hasOnlyPruningFilter() {
        return this.onlyPruningFilter;
      }

+ public final boolean isPartitioned() {
+ return this.table.isPartitioned();
+ }
+
+ /* there are filter operators in the pipeline */
+ public final boolean isFiltered() {
+ return this.filtered;
+ }
+
+ public final void setFiltered(boolean filtered) {
+ this.filtered = filtered;
+ }
+
      private FetchWork convertToWork() throws HiveException {
        inputs.clear();
        if (!table.isPartitioned()) {

Modified: hive/trunk/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q?rev=1652543&r1=1652542&r2=1652543&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q Fri Jan 16 23:19:43 2015
@@ -7,9 +7,13 @@ set hive.fetch.task.conversion.threshold

  explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
  explain select cast(key as int) * 10, upper(value) from src limit 10;
+-- Scans without limit (should be Fetch task now)
+explain select concat(key, value) from src;

  set hive.fetch.task.conversion.threshold=100;

  -- from HIVE-7397, limit + partition pruning filter
  explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10;
  explain select cast(key as int) * 10, upper(value) from src limit 10;
+-- Scans without limit (should not be Fetch task now)
+explain select concat(key, value) from src;

Modified: hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out?rev=1652543&r1=1652542&r2=1652543&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out Fri Jan 16 23:19:43 2015
@@ -94,6 +94,29 @@ STAGE PLANS:
                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
                ListSink

+PREHOOK: query: -- Scans without limit (should be Fetch task now)
+explain select concat(key, value) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Scans without limit (should be Fetch task now)
+explain select concat(key, value) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: concat(key, value) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
  PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter
  explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10
  PREHOOK: type: QUERY
@@ -125,6 +148,32 @@ PREHOOK: type: QUERY
  POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10
  POSTHOOK: type: QUERY
  STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: -- Scans without limit (should not be Fetch task now)
+explain select concat(key, value) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Scans without limit (should not be Fetch task now)
+explain select concat(key, value) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
    Stage-1 is a root stage
    Stage-0 depends on stages: Stage-1

@@ -136,23 +185,20 @@ STAGE PLANS:
              alias: src
              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
              Select Operator
- expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string)
- outputColumnNames: _col0, _col1
+ expressions: concat(key, value) (type: string)
+ outputColumnNames: _col0
                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

    Stage: Stage-0
      Fetch Operator
- limit: 10
+ limit: -1
        Processor Tree:
          ListSink

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 16, '15 at 11:19p
activeJan 16, '15 at 11:19p
posts1
users1
websitehive.apache.org

1 user in discussion

Gopalv: 1 post

People

Translate

site design / logo © 2021 Grokbase