FAQ
Repository: hive
Updated Branches:
   refs/heads/master bcbd245c8 -> 0bab072d1


HIVE-12444 - Global Limit optimization on ACID table without base directory may throw exception (Wei Zheng via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0bab072d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0bab072d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0bab072d

Branch: refs/heads/master
Commit: 0bab072d17df5022ee6262be68d0442f206e398b
Parents: bcbd245
Author: Eugene Koifman <ekoifman@hortonworks.com>
Authored: Wed Dec 2 12:37:31 2015 -0800
Committer: Eugene Koifman <ekoifman@hortonworks.com>
Committed: Wed Dec 2 12:37:31 2015 -0800

----------------------------------------------------------------------
  .../test/resources/testconfiguration.properties | 3 +-
  .../hive/ql/optimizer/GenMapRedUtils.java | 43 ++++++++++++--------
  2 files changed, 27 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0bab072d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 935fd28..8db4a9f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -68,7 +68,8 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\
    rcfile_merge1.q,\
    smb_mapjoin_8.q

-minitez.query.files.shared=alter_merge_2_orc.q,\
+minitez.query.files.shared=acid_globallimit.q,\
+ alter_merge_2_orc.q,\
    alter_merge_orc.q,\
    alter_merge_stats_orc.q,\
    auto_join0.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/0bab072d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index ecdaa55..0cd7b62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -489,6 +489,7 @@ public final class GenMapRedUtils {
        HiveConf conf, boolean local) throws SemanticException {
      ArrayList<Path> partDir = new ArrayList<Path>();
      ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
+ boolean isAcidTable = false;

      Path tblDir = null;
      plan.setNameToSplitSample(parseCtx.getNameToSplitSample());
@@ -497,6 +498,7 @@ public final class GenMapRedUtils {
        try {
          TableScanOperator tsOp = (TableScanOperator) topOp;
          partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
+ isAcidTable = ((TableScanOperator) topOp).getConf().isAcidTable();
        } catch (SemanticException e) {
          throw e;
        }
@@ -536,26 +538,31 @@ public final class GenMapRedUtils {
      long sizeNeeded = Integer.MAX_VALUE;
      int fileLimit = -1;
      if (parseCtx.getGlobalLimitCtx().isEnable()) {
- long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
- HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
- sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
- // for the optimization that reduce number of input file, we limit number
- // of files allowed. If more than specific number of files have to be
- // selected, we skip this optimization. Since having too many files as
- // inputs can cause unpredictable latency. It's not necessarily to be
- // cheaper.
- fileLimit =
- HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
-
- if (sizePerRow <= 0 || fileLimit <= 0) {
- LOG.info("Skip optimization to reduce input size of 'limit'");
+ if (isAcidTable) {
+ LOG.info("Skip Global Limit optimization for ACID table");
          parseCtx.getGlobalLimitCtx().disableOpt();
- } else if (parts.isEmpty()) {
- LOG.info("Empty input: skip limit optimiztion");
        } else {
- LOG.info("Try to reduce input size for 'limit' " +
- "sizeNeeded: " + sizeNeeded +
- " file limit : " + fileLimit);
+ long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
+ HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
+ sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
+ // for the optimization that reduce number of input file, we limit number
+ // of files allowed. If more than specific number of files have to be
+ // selected, we skip this optimization. Since having too many files as
+ // inputs can cause unpredictable latency. It's not necessarily to be
+ // cheaper.
+ fileLimit =
+ HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
+
+ if (sizePerRow <= 0 || fileLimit <= 0) {
+ LOG.info("Skip optimization to reduce input size of 'limit'");
+ parseCtx.getGlobalLimitCtx().disableOpt();
+ } else if (parts.isEmpty()) {
+ LOG.info("Empty input: skip limit optimiztion");
+ } else {
+ LOG.info("Try to reduce input size for 'limit' " +
+ "sizeNeeded: " + sizeNeeded +
+ " file limit : " + fileLimit);
+ }
        }
      }
      boolean isFirstPart = true;

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 2, '15 at 8:37p
activeDec 2, '15 at 8:37p
posts1
users1
websitehive.apache.org

1 user in discussion

Ekoifman: 1 post

People

Translate

site design / logo © 2021 Grokbase