FAQ
Author: hashutosh
Date: Mon Apr 13 18:10:37 2015
New Revision: 1673250

URL: http://svn.apache.org/r1673250
Log:
HIVE-10315 : CBO (Calcite Return Path): HiveRelSize accessing columns without available stats [CBO branch] (Jesus Camacho Rodriguez)

Modified:
     hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
     hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java?rev=1673250&r1=1673249&r2=1673250&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java Mon Apr 13 18:10:37 2015
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.stats.S

  import com.google.common.collect.ImmutableList;
  import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;

  public class RelOptHiveTable extends RelOptAbstractTable {
    private final Table hiveTblMetadata;
@@ -259,7 +260,7 @@ public class RelOptHiveTable extends Rel
      }
    }

- private void updateColStats(Set<Integer> projIndxLst) {
+ private void updateColStats(Set<Integer> projIndxLst, boolean allowNullColumnForMissingStats) {
      List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
      List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
      List<String> partColNamesThatRqrStats = new ArrayList<String>();
@@ -372,9 +373,13 @@ public class RelOptHiveTable extends Rel
      if (!colNamesFailedStats.isEmpty()) {
        String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: "
            + getColNamesForLogging(colNamesFailedStats);
- LOG.error(logMsg);
        noColsMissingStats.getAndAdd(colNamesFailedStats.size());
- throw new RuntimeException(logMsg);
+ if (allowNullColumnForMissingStats) {
+ LOG.warn(logMsg);
+ } else {
+ LOG.error(logMsg);
+ throw new RuntimeException(logMsg);
+ }
      }
    }

@@ -387,10 +392,14 @@ public class RelOptHiveTable extends Rel
    }

    public List<ColStatistics> getColStat(List<Integer> projIndxLst) {
- ImmutableList.Builder<ColStatistics> colStatsBldr = ImmutableList.<ColStatistics> builder();
+ return getColStat(projIndxLst, false);
+ }
+
+ public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean allowNullColumnForMissingStats) {
+ List<ColStatistics> colStatsBldr = Lists.newArrayList();

      if (projIndxLst != null) {
- updateColStats(new HashSet<Integer>(projIndxLst));
+ updateColStats(new HashSet<Integer>(projIndxLst), allowNullColumnForMissingStats);
        for (Integer i : projIndxLst) {
          colStatsBldr.add(hiveColStatsMap.get(i));
        }
@@ -399,13 +408,13 @@ public class RelOptHiveTable extends Rel
        for (Integer i = 0; i < noOfNonVirtualCols; i++) {
          pILst.add(i);
        }
- updateColStats(new HashSet<Integer>(pILst));
+ updateColStats(new HashSet<Integer>(pILst), allowNullColumnForMissingStats);
        for (Integer pi : pILst) {
          colStatsBldr.add(hiveColStatsMap.get(pi));
        }
      }

- return colStatsBldr.build();
+ return colStatsBldr;
    }

    /*

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java?rev=1673250&r1=1673249&r2=1673250&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java Mon Apr 13 18:10:37 2015
@@ -17,9 +17,7 @@
   */
  package org.apache.hadoop.hive.ql.optimizer.calcite.stats;

-import java.util.HashSet;
  import java.util.List;
-import java.util.Set;

  import org.apache.calcite.rel.RelNode;
  import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
@@ -54,20 +52,19 @@ public class HiveRelMdSize extends RelMd

    public List<Double> averageColumnSizes(HiveTableScan scan) {
      List<Integer> neededcolsLst = scan.getNeededColIndxsFrmReloptHT();
- Set<Integer> needColsSet = new HashSet<Integer>(neededcolsLst);
      List<ColStatistics> columnStatistics = ((RelOptHiveTable) scan.getTable())
- .getColStat(neededcolsLst);
+ .getColStat(neededcolsLst, true);

      // Obtain list of col stats, or use default if they are not available
      final ImmutableList.Builder<Double> list = ImmutableList.builder();
      int indxRqdCol = 0;
      int nFields = scan.getRowType().getFieldCount();
      for (int i = 0; i < nFields; i++) {
- if (needColsSet.contains(i)) {
+ if (neededcolsLst.contains(i)) {
          ColStatistics columnStatistic = columnStatistics.get(indxRqdCol);
          indxRqdCol++;
          if (columnStatistic == null) {
- RelDataTypeField field = scan.getPrunedRowType().getFieldList().get(i);
+ RelDataTypeField field = scan.getRowType().getFieldList().get(i);
            list.add(averageTypeValueSize(field.getType()));
          } else {
            list.add(columnStatistic.getAvgColLen());

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedApr 13, '15 at 6:10p
activeApr 13, '15 at 6:10p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase