FAQ
Repository: hive
Updated Branches:
   refs/heads/master 648f2c6bd -> 152813517


HIVE-11926: Stats annotation might not extract stats for varchar/decimal columns (Chaoyu Tang, reviewed by Xuefu Zhang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15281351
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15281351
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15281351

Branch: refs/heads/master
Commit: 1528135176df0bb30351471eb05b919d706669b8
Parents: 648f2c6
Author: ctang <ctang.ma@gmail.com>
Authored: Thu Sep 24 12:20:42 2015 -0400
Committer: ctang <ctang.ma@gmail.com>
Committed: Thu Sep 24 14:43:32 2015 -0400

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/stats/StatsUtils.java | 182 ++++++++++---------
  1 file changed, 94 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/15281351/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 2c970bd..cc8c9e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -429,10 +429,11 @@ public class StatsUtils {
        String colType, String defaultPartName) {
      Range range = null;
      String partVal;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
        long min = Long.MAX_VALUE;
        long max = Long.MIN_VALUE;
        for (Partition partition : partitions) {
@@ -447,8 +448,8 @@ public class StatsUtils {
          }
        }
        range = new Range(min, max);
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
        double min = Double.MAX_VALUE;
        double max = Double.MIN_VALUE;
        for (Partition partition : partitions) {
@@ -463,7 +464,7 @@ public class StatsUtils {
          }
        }
        range = new Range(min, max);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
        double min = Double.MAX_VALUE;
        double max = Double.MIN_VALUE;
        for (Partition partition : partitions) {
@@ -515,18 +516,18 @@ public class StatsUtils {
          continue;
        }
        ObjectInspector oi = ci.getObjectInspector();
- String colType = ci.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.LIST_TYPE_NAME)
- || colType.startsWith(serdeConstants.MAP_TYPE_NAME)
- || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME)
- || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) {
- avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = ci.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
+ avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
        } else {
- avgRowSize += getAvgColLenOfFixedLengthTypes(colType);
+ avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
        }
      }
      return avgRowSize;
@@ -640,38 +641,38 @@ public class StatsUtils {
     */
    public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
        String colName) {
- ColStatistics cs = new ColStatistics(colName, cso.getColType());
- String colType = cso.getColType();
+ String colTypeLowerCase = cso.getColType().toLowerCase();
+ ColStatistics cs = new ColStatistics(colName, colTypeLowerCase);
      ColumnStatisticsData csd = cso.getStatsData();
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
        cs.setCountDistint(csd.getLongStats().getNumDVs());
        cs.setNumNulls(csd.getLongStats().getNumNulls());
        cs.setAvgColLen(JavaDataModel.get().primitive1());
        cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
        cs.setCountDistint(csd.getLongStats().getNumDVs());
        cs.setNumNulls(csd.getLongStats().getNumNulls());
        cs.setAvgColLen(JavaDataModel.get().primitive2());
        cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
        cs.setCountDistint(csd.getDoubleStats().getNumDVs());
        cs.setNumNulls(csd.getDoubleStats().getNumNulls());
        cs.setAvgColLen(JavaDataModel.get().primitive1());
        cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
        cs.setCountDistint(csd.getDoubleStats().getNumDVs());
        cs.setNumNulls(csd.getDoubleStats().getNumNulls());
        cs.setAvgColLen(JavaDataModel.get().primitive2());
        cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
        cs.setCountDistint(csd.getStringStats().getNumDVs());
        cs.setNumNulls(csd.getStringStats().getNumNulls());
        cs.setAvgColLen(csd.getStringStats().getAvgColLen());
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
        if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
          cs.setCountDistint(2);
        } else {
@@ -681,12 +682,12 @@ public class StatsUtils {
        cs.setNumFalses(csd.getBooleanStats().getNumFalses());
        cs.setNumNulls(csd.getBooleanStats().getNumNulls());
        cs.setAvgColLen(JavaDataModel.get().primitive1());
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
        cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
        cs.setNumNulls(csd.getBinaryStats().getNumNulls());
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
        cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
        cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
        cs.setCountDistint(csd.getDecimalStats().getNumDVs());
        cs.setNumNulls(csd.getDecimalStats().getNumNulls());
@@ -697,7 +698,7 @@ public class StatsUtils {
        BigDecimal minVal = HiveDecimal.
            create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
        cs.setRange(minVal, maxVal);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
        cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
      } else {
        // Columns statistics for complex datatypes are not supported yet
@@ -741,7 +742,9 @@ public class StatsUtils {
      List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
      for (ColumnStatisticsObj statObj : colStats) {
        ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
- stats.add(cs);
+ if (cs != null) {
+ stats.add(cs);
+ }
      }
      return stats;
    }
@@ -776,8 +779,8 @@ public class StatsUtils {
        String colType) {

      long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
-
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
+ String colTypeLowCase = colType.toLowerCase();
+ if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {

        // constant string projection Ex: select "hello" from table
        if (oi instanceof ConstantObjectInspector) {
@@ -793,7 +796,7 @@ public class StatsUtils {
          // return the variable length from config
          return configVarLen;
        }
- } else if (colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {

        // constant varchar projection
        if (oi instanceof ConstantObjectInspector) {
@@ -806,7 +809,7 @@ public class StatsUtils {
          VarcharTypeInfo type = (VarcharTypeInfo) ((HiveVarcharObjectInspector) oi).getTypeInfo();
          return type.getLength();
        }
- } else if (colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {

        // constant char projection
        if (oi instanceof ConstantObjectInspector) {
@@ -819,7 +822,7 @@ public class StatsUtils {
          CharTypeInfo type = (CharTypeInfo) ((HiveCharObjectInspector) oi).getTypeInfo();
          return type.getLength();
        }
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {

        // constant byte arrays
        if (oi instanceof ConstantObjectInspector) {
@@ -858,17 +861,17 @@ public class StatsUtils {

      switch (oi.getCategory()) {
      case PRIMITIVE:
- String colType = oi.getTypeName();
- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ String colTypeLowerCase = oi.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
          result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
- int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
+ int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
          result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
        } else {
- result += getAvgColLenOfFixedLengthTypes(colType);
+ result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
        }
        break;
      case LIST:
@@ -952,21 +955,22 @@ public class StatsUtils {
     * @return raw data size
     */
    public static long getAvgColLenOfFixedLengthTypes(String colType) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
        return JavaDataModel.get().primitive1();
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
        return JavaDataModel.get().primitive2();
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
        return JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
        return JavaDataModel.get().lengthOfDate();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
        return JavaDataModel.get().lengthOfDecimal();
      } else {
        return 0;
@@ -982,25 +986,26 @@ public class StatsUtils {
     * @return raw data size
     */
    public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) {
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
+ String colTypeLowerCase = colType.toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
        return JavaDataModel.get().lengthForIntArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
        return JavaDataModel.get().lengthForDoubleArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase("long")) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals("long")) {
        return JavaDataModel.get().lengthForLongArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
        return JavaDataModel.get().lengthForByteArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
        return JavaDataModel.get().lengthForBooleanArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
        return JavaDataModel.get().lengthForTimestampArrayOfSize(length);
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
        return JavaDataModel.get().lengthForDateArrayOfSize(length);
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
        return JavaDataModel.get().lengthForDecimalArrayOfSize(length);
      } else {
        return 0;
@@ -1267,8 +1272,9 @@ public class StatsUtils {
        throw new IllegalArgumentException("not supported expr type " + end.getClass());
      }

- if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
+ colType = colType.toLowerCase();
+ if (colType.equals(serdeConstants.STRING_TYPE_NAME)
+ || colType.equals(serdeConstants.BINARY_TYPE_NAME)
colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
colType.startsWith(serdeConstants.LIST_TYPE_NAME)
@@ -1380,30 +1386,30 @@ public class StatsUtils {

      for (ColStatistics cs : colStats) {
        if (cs != null) {
- String colType = cs.getColumnType();
+ String colTypeLowerCase = cs.getColumnType().toLowerCase();
          long nonNullCount = numRows - cs.getNumNulls();
          double sizeOf = 0;
- if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
- || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
+ if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            sizeOf = cs.getAvgColLen();
- } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
- || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
            int acl = (int) Math.round(cs.getAvgColLen());
            sizeOf = JavaDataModel.get().lengthForStringOfLength(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
            int acl = (int) Math.round(cs.getAvgColLen());
            sizeOf = JavaDataModel.get().lengthForByteArrayOfSize(acl);
- } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            sizeOf = JavaDataModel.get().lengthOfTimestamp();
- } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            sizeOf = JavaDataModel.get().lengthOfDecimal();
- } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
+ } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            sizeOf = JavaDataModel.get().lengthOfDate();
          } else {
            sizeOf = cs.getAvgColLen();

Search Discussions

  • Ctang at Sep 24, 2015 at 6:48 pm
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 82368b64a -> 232793bea


    HIVE-11926: Stats annotation might not extract stats for varchar/decimal columns (Chaoyu Tang, reviewed by Xuefu Zhang)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/232793be
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/232793be
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/232793be

    Branch: refs/heads/branch-1
    Commit: 232793beabd1d55f865aa8ec8ba0ab4780b5a5e6
    Parents: 82368b6
    Author: ctang <ctang.ma@gmail.com>
    Authored: Thu Sep 24 12:20:42 2015 -0400
    Committer: ctang <ctang.ma@gmail.com>
    Committed: Thu Sep 24 14:44:48 2015 -0400

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/stats/StatsUtils.java | 182 ++++++++++---------
      1 file changed, 94 insertions(+), 88 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/232793be/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    index a8a2091..f67ddf1 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    @@ -430,10 +430,11 @@ public class StatsUtils {
            String colType, String defaultPartName) {
          Range range = null;
          String partVal;
    - if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
    + String colTypeLowerCase = colType.toLowerCase();
    + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
            long min = Long.MAX_VALUE;
            long max = Long.MIN_VALUE;
            for (Partition partition : partitions) {
    @@ -448,8 +449,8 @@ public class StatsUtils {
              }
            }
            range = new Range(min, max);
    - } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            double min = Double.MAX_VALUE;
            double max = Double.MIN_VALUE;
            for (Partition partition : partitions) {
    @@ -464,7 +465,7 @@ public class StatsUtils {
              }
            }
            range = new Range(min, max);
    - } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
    + } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            double min = Double.MAX_VALUE;
            double max = Double.MIN_VALUE;
            for (Partition partition : partitions) {
    @@ -516,18 +517,18 @@ public class StatsUtils {
              continue;
            }
            ObjectInspector oi = ci.getObjectInspector();
    - String colType = ci.getTypeName();
    - if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
    - || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    - || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
    - || colType.startsWith(serdeConstants.LIST_TYPE_NAME)
    - || colType.startsWith(serdeConstants.MAP_TYPE_NAME)
    - || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME)
    - || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) {
    - avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colType);
    + String colTypeLowerCase = ci.getTypeName().toLowerCase();
    + if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
    + avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
            } else {
    - avgRowSize += getAvgColLenOfFixedLengthTypes(colType);
    + avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
            }
          }
          return avgRowSize;
    @@ -641,38 +642,38 @@ public class StatsUtils {
         */
        public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName,
            String colName) {
    - ColStatistics cs = new ColStatistics(colName, cso.getColType());
    - String colType = cso.getColType();
    + String colTypeLowerCase = cso.getColType().toLowerCase();
    + ColStatistics cs = new ColStatistics(colName, colTypeLowerCase);
          ColumnStatisticsData csd = cso.getStatsData();
    - if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) {
    + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
            cs.setCountDistint(csd.getLongStats().getNumDVs());
            cs.setNumNulls(csd.getLongStats().getNumNulls());
            cs.setAvgColLen(JavaDataModel.get().primitive1());
            cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
    - } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
            cs.setCountDistint(csd.getLongStats().getNumDVs());
            cs.setNumNulls(csd.getLongStats().getNumNulls());
            cs.setAvgColLen(JavaDataModel.get().primitive2());
            cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
    - } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
            cs.setCountDistint(csd.getDoubleStats().getNumDVs());
            cs.setNumNulls(csd.getDoubleStats().getNumNulls());
            cs.setAvgColLen(JavaDataModel.get().primitive1());
            cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
    - } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            cs.setCountDistint(csd.getDoubleStats().getNumDVs());
            cs.setNumNulls(csd.getDoubleStats().getNumNulls());
            cs.setAvgColLen(JavaDataModel.get().primitive2());
            cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
    - } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
    - || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
    - || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
            cs.setCountDistint(csd.getStringStats().getNumDVs());
            cs.setNumNulls(csd.getStringStats().getNumNulls());
            cs.setAvgColLen(csd.getStringStats().getAvgColLen());
    - } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
            if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
              cs.setCountDistint(2);
            } else {
    @@ -682,12 +683,12 @@ public class StatsUtils {
            cs.setNumFalses(csd.getBooleanStats().getNumFalses());
            cs.setNumNulls(csd.getBooleanStats().getNumNulls());
            cs.setAvgColLen(JavaDataModel.get().primitive1());
    - } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
            cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
            cs.setNumNulls(csd.getBinaryStats().getNumNulls());
    - } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
    - } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
    + } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
            cs.setCountDistint(csd.getDecimalStats().getNumDVs());
            cs.setNumNulls(csd.getDecimalStats().getNumNulls());
    @@ -698,7 +699,7 @@ public class StatsUtils {
            BigDecimal minVal = HiveDecimal.
                create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
            cs.setRange(minVal, maxVal);
    - } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
          } else {
            // Columns statistics for complex datatypes are not supported yet
    @@ -739,7 +740,9 @@ public class StatsUtils {
          List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
          for (ColumnStatisticsObj statObj : colStats) {
            ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
    - stats.add(cs);
    + if (cs != null) {
    + stats.add(cs);
    + }
          }
          return stats;
        }
    @@ -774,8 +777,8 @@ public class StatsUtils {
            String colType) {

          long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH);
    -
    - if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) {
    + String colTypeLowCase = colType.toLowerCase();
    + if (colTypeLowCase.equals(serdeConstants.STRING_TYPE_NAME)) {

            // constant string projection Ex: select "hello" from table
            if (oi instanceof ConstantObjectInspector) {
    @@ -801,7 +804,7 @@ public class StatsUtils {
              // return the variable length from config
              return configVarLen;
            }
    - } else if (colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
    + } else if (colTypeLowCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {

            // constant varchar projection
            if (oi instanceof ConstantObjectInspector) {
    @@ -821,7 +824,7 @@ public class StatsUtils {
            } else if (oi instanceof WritableHiveVarcharObjectInspector) {
              return ((WritableHiveVarcharObjectInspector) oi).getMaxLength();
            }
    - } else if (colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
    + } else if (colTypeLowCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {

            // constant char projection
            if (oi instanceof ConstantObjectInspector) {
    @@ -841,7 +844,7 @@ public class StatsUtils {
            } else if (oi instanceof WritableHiveCharObjectInspector) {
              return ((WritableHiveCharObjectInspector) oi).getMaxLength();
            }
    - } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
    + } else if (colTypeLowCase.equals(serdeConstants.BINARY_TYPE_NAME)) {

            // constant byte arrays
            if (oi instanceof ConstantObjectInspector) {
    @@ -890,17 +893,17 @@ public class StatsUtils {

          switch (oi.getCategory()) {
          case PRIMITIVE:
    - String colType = oi.getTypeName();
    - if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
    - || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    - || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
    - int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
    + String colTypeLowerCase = oi.getTypeName().toLowerCase();
    + if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
    + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
              result += JavaDataModel.get().lengthForStringOfLength(avgColLen);
    - } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
    - int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType);
    + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
    + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase);
              result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen);
            } else {
    - result += getAvgColLenOfFixedLengthTypes(colType);
    + result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
            }
            break;
          case LIST:
    @@ -984,21 +987,22 @@ public class StatsUtils {
         * @return raw data size
         */
        public static long getAvgColLenOfFixedLengthTypes(String colType) {
    - if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
    + String colTypeLowerCase = colType.toLowerCase();
    + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
            return JavaDataModel.get().primitive1();
    - } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
    - || colType.equalsIgnoreCase("long")) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
    + || colTypeLowerCase.equals("long")) {
            return JavaDataModel.get().primitive2();
    - } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            return JavaDataModel.get().lengthOfTimestamp();
    - } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            return JavaDataModel.get().lengthOfDate();
    - } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
    + } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            return JavaDataModel.get().lengthOfDecimal();
          } else {
            return 0;
    @@ -1014,25 +1018,26 @@ public class StatsUtils {
         * @return raw data size
         */
        public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) {
    - if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
    + String colTypeLowerCase = colType.toLowerCase();
    + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
            return JavaDataModel.get().lengthForIntArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            return JavaDataModel.get().lengthForDoubleArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
    - || colType.equalsIgnoreCase("long")) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
    + || colTypeLowerCase.equals("long")) {
            return JavaDataModel.get().lengthForLongArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
            return JavaDataModel.get().lengthForByteArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
            return JavaDataModel.get().lengthForBooleanArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            return JavaDataModel.get().lengthForTimestampArrayOfSize(length);
    - } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            return JavaDataModel.get().lengthForDateArrayOfSize(length);
    - } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
    + } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            return JavaDataModel.get().lengthForDecimalArrayOfSize(length);
          } else {
            return 0;
    @@ -1287,8 +1292,9 @@ public class StatsUtils {
            oi = enfd.getWritableObjectInspector();
          }

    - if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)
    + colType = colType.toLowerCase();
    + if (colType.equals(serdeConstants.STRING_TYPE_NAME)
    + || colType.equals(serdeConstants.BINARY_TYPE_NAME)
    colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
    colType.startsWith(serdeConstants.LIST_TYPE_NAME)
    @@ -1400,30 +1406,30 @@ public class StatsUtils {

          for (ColStatistics cs : colStats) {
            if (cs != null) {
    - String colType = cs.getColumnType();
    + String colTypeLowerCase = cs.getColumnType().toLowerCase();
              long nonNullCount = numRows - cs.getNumNulls();
              double sizeOf = 0;
    - if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)
    - || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
    + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)
    + || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
                sizeOf = cs.getAvgColLen();
    - } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
    - || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    - || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
    + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) {
                int acl = (int) Math.round(cs.getAvgColLen());
                sizeOf = JavaDataModel.get().lengthForStringOfLength(acl);
    - } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
                int acl = (int) Math.round(cs.getAvgColLen());
                sizeOf = JavaDataModel.get().lengthForByteArrayOfSize(acl);
    - } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
                sizeOf = JavaDataModel.get().lengthOfTimestamp();
    - } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
    + } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
                sizeOf = JavaDataModel.get().lengthOfDecimal();
    - } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
    + } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
                sizeOf = JavaDataModel.get().lengthOfDate();
              } else {
                sizeOf = cs.getAvgColLen();

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 24, '15 at 6:43p
activeSep 24, '15 at 6:48p
posts2
users1
websitehive.apache.org

1 user in discussion

Ctang: 2 posts

People

Translate

site design / logo © 2021 Grokbase