FAQ
Author: hashutosh
Date: Wed Apr 8 16:41:37 2015
New Revision: 1672119

URL: http://svn.apache.org/r1672119
Log:
HIVE-10231 : Compute partition column stats fails if partition col type is date (Chaoyu Tang via Ashutosh Chauhan)

Added:
     hive/trunk/ql/src/test/queries/clientpositive/columnstats_part_coltype.q
     hive/trunk/ql/src/test/results/clientpositive/columnstats_part_coltype.q.out
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1672119&r1=1672118&r2=1672119&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Wed Apr 8 16:41:37 2015
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadat
  import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
  import org.apache.hadoop.hive.ql.metadata.Table;
  import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.serde.serdeConstants;

  /**
   * ColumnStatsSemanticAnalyzer.
@@ -186,15 +187,7 @@ public class ColumnStatsSemanticAnalyzer
          } else {
            whereClause.append(" and ");
          }
- whereClause.append(partKey);
- whereClause.append(" = ");
- if (getColTypeOf(partKey).equalsIgnoreCase("string")) {
- whereClause.append("'");
- }
- whereClause.append(value);
- if (getColTypeOf(partKey).equalsIgnoreCase("string")) {
- whereClause.append("'");
- }
+ whereClause.append(partKey).append(" = ").append(genPartValueString(partKey, value));
        }
      }

@@ -211,11 +204,39 @@ public class ColumnStatsSemanticAnalyzer
      return predPresent ? whereClause.append(groupByClause) : groupByClause;
    }

+ private String genPartValueString (String partKey, String partVal) throws SemanticException {
+ String returnVal = partVal;
+ String partColType = getColTypeOf(partKey);
+ if (partColType.equals(serdeConstants.STRING_TYPE_NAME) ||
+ partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) ||
+ partColType.contains(serdeConstants.CHAR_TYPE_NAME)) {
+ returnVal = "'" + partVal + "'";
+ } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) {
+ returnVal = partVal+"Y";
+ } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
+ returnVal = partVal+"S";
+ } else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) {
+ returnVal = partVal;
+ } else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) {
+ returnVal = partVal+"L";
+ } else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) {
+ returnVal = partVal + "BD";
+ } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) ||
+ partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
+ returnVal = partColType + " '" + partVal + "'";
+ } else {
+ //for other usually not used types, just quote the value
+ returnVal = "'" + partVal + "'";
+ }
+
+ return returnVal;
+ }
+
    private String getColTypeOf (String partKey) throws SemanticException{

      for (FieldSchema fs : tbl.getPartitionKeys()) {
        if (partKey.equalsIgnoreCase(fs.getName())) {
- return fs.getType();
+ return fs.getType().toLowerCase();
        }
      }
      throw new SemanticException ("Unknown partition key : " + partKey);

Added: hive/trunk/ql/src/test/queries/clientpositive/columnstats_part_coltype.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/columnstats_part_coltype.q?rev=1672119&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/columnstats_part_coltype.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/columnstats_part_coltype.q Wed Apr 8 16:41:37 2015
@@ -0,0 +1,71 @@
+-- Test type date, int, and string in partition column
+drop table if exists partcolstats;
+
+create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string);
+insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20;
+insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20;
+insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30;
+insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40;
+insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60;
+
+analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns;
+describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA');
+describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA');
+
+describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB');
+describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB');
+
+analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns;
+describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB');
+describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB');
+
+describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA');
+describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA');
+
+analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns;
+describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA');
+describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA');
+
+describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA');
+describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA');
+describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB');
+describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB');
+
+analyze table partcolstats partition (ds, hr, part) compute statistics for columns;
+describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA');
+describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA');
+describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB');
+describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB');
+
+drop table partcolstats;
+
+-- Test type tinyint, smallint, and bigint in partition column
+drop table if exists partcolstatsnum;
+create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint);
+insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30;
+
+analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns;
+describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000);
+
+drop table partcolstatsnum;
+
+-- Test type decimal in partition column
+drop table if exists partcolstatsdec;
+create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4));
+insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30;
+
+analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns;
+describe formatted partcolstatsdec.value partition (decpart='1000.0001');
+
+drop table partcolstatsdec;
+
+-- Test type varchar and char in partition column
+drop table if exists partcolstatschar;
+create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3));
+insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30;
+
+analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns;
+describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa');
+
+drop table partcolstatschar;
+

Added: hive/trunk/ql/src/test/results/clientpositive/columnstats_part_coltype.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/columnstats_part_coltype.q.out?rev=1672119&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/columnstats_part_coltype.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/columnstats_part_coltype.q.out Wed Apr 8 16:41:37 2015
@@ -0,0 +1,441 @@
+PREHOOK: query: -- Test type date, int, and string in partition column
+drop table if exists partcolstats
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Test type date, int, and string in partition column
+drop table if exists partcolstats
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partcolstats
+POSTHOOK: query: create table partcolstats (key int, value string) partitioned by (ds date, hr int, part string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partcolstats
+PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') select key, value from src limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') select key, value from src limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partB).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=2,part=partB).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') select key, value from src limit 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=3,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-02,hr=3,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA
+POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') select key, value from src limit 40
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partA
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partA).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partA).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB
+POSTHOOK: query: insert into partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') select key, value from src limit 60
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstats@ds=2015-04-03/hr=3/part=partB
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partB).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstats PARTITION(ds=2015-04-03,hr=3,part=partB).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstats
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part='partA') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstats
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key int 27 484 0 18 from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.8 7 from deserializer
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+key int from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+value string from deserializer
+PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstats
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstats
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=2, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key int 27 484 0 18 from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=2, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.8 7 from deserializer
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+key int from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+value string from deserializer
+PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstats
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstats
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-02', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key int 27 495 0 28 from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-02', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.833333333333333 7 from deserializer
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+key int from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+value string from deserializer
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+key int from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type comment
+
+value string from deserializer
+PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstats
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+PREHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA
+PREHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstats
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partA
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=2/part=partB
+POSTHOOK: Input: default@partcolstats@ds=2015-04-02/hr=3/part=partA
+POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partA
+POSTHOOK: Input: default@partcolstats@ds=2015-04-03/hr=3/part=partB
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key int 15 495 0 43 from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partA')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 34 6.825 7 from deserializer
+PREHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.key partition (ds=date '2015-04-03', hr=3, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+key int 15 495 0 51 from deserializer
+PREHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstats
+POSTHOOK: query: describe formatted partcolstats.value partition (ds=date '2015-04-03', hr=3, part='partB')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstats
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 53 6.883333333333334 7 from deserializer
+PREHOOK: query: drop table partcolstats
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@partcolstats
+PREHOOK: Output: default@partcolstats
+POSTHOOK: query: drop table partcolstats
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@partcolstats
+POSTHOOK: Output: default@partcolstats
+PREHOOK: query: -- Test type tinyint, smallint, and bigint in partition column
+drop table if exists partcolstatsnum
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Test type tinyint, smallint, and bigint in partition column
+drop table if exists partcolstatsnum
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partcolstatsnum
+POSTHOOK: query: create table partcolstatsnum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partcolstatsnum
+PREHOOK: query: insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstatsnum@tint=100/sint=1000/bint=1000000
+POSTHOOK: query: insert into partcolstatsnum partition (tint=100, sint=1000, bint=1000000) select key, value from src limit 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstatsnum@tint=100/sint=1000/bint=1000000
+POSTHOOK: Lineage: partcolstatsnum PARTITION(tint=100,sint=1000,bint=1000000).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstatsnum PARTITION(tint=100,sint=1000,bint=1000000).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstatsnum
+PREHOOK: Input: default@partcolstatsnum@tint=100/sint=1000/bint=1000000
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstatsnum partition (tint=100, sint=1000, bint=1000000) compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstatsnum
+POSTHOOK: Input: default@partcolstatsnum@tint=100/sint=1000/bint=1000000
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstatsnum
+POSTHOOK: query: describe formatted partcolstatsnum.value partition (tint=100, sint=1000, bint=1000000)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstatsnum
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.833333333333333 7 from deserializer
+PREHOOK: query: drop table partcolstatsnum
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@partcolstatsnum
+PREHOOK: Output: default@partcolstatsnum
+POSTHOOK: query: drop table partcolstatsnum
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@partcolstatsnum
+POSTHOOK: Output: default@partcolstatsnum
+PREHOOK: query: -- Test type decimal in partition column
+drop table if exists partcolstatsdec
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Test type decimal in partition column
+drop table if exists partcolstatsdec
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partcolstatsdec
+POSTHOOK: query: create table partcolstatsdec (key int, value string) partitioned by (decpart decimal(8,4))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partcolstatsdec
+PREHOOK: query: insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstatsdec@decpart=1000.0001
+POSTHOOK: query: insert into partcolstatsdec partition (decpart='1000.0001') select key, value from src limit 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstatsdec@decpart=1000.0001
+POSTHOOK: Lineage: partcolstatsdec PARTITION(decpart=1000.0001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstatsdec PARTITION(decpart=1000.0001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstatsdec
+PREHOOK: Input: default@partcolstatsdec@decpart=1000.0001
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstatsdec partition (decpart='1000.0001') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstatsdec
+POSTHOOK: Input: default@partcolstatsdec@decpart=1000.0001
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstatsdec.value partition (decpart='1000.0001')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstatsdec
+POSTHOOK: query: describe formatted partcolstatsdec.value partition (decpart='1000.0001')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstatsdec
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.833333333333333 7 from deserializer
+PREHOOK: query: drop table partcolstatsdec
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@partcolstatsdec
+PREHOOK: Output: default@partcolstatsdec
+POSTHOOK: query: drop table partcolstatsdec
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@partcolstatsdec
+POSTHOOK: Output: default@partcolstatsdec
+PREHOOK: query: -- Test type varchar and char in partition column
+drop table if exists partcolstatschar
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Test type varchar and char in partition column
+drop table if exists partcolstatschar
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partcolstatschar
+POSTHOOK: query: create table partcolstatschar (key int, value string) partitioned by (varpart varchar(5), charpart char(3))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partcolstatschar
+PREHOOK: query: insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@partcolstatschar@varpart=part1/charpart=aaa
+POSTHOOK: query: insert into partcolstatschar partition (varpart='part1', charpart='aaa') select key, value from src limit 30
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@partcolstatschar@varpart=part1/charpart=aaa
+POSTHOOK: Lineage: partcolstatschar PARTITION(varpart=part1,charpart=aaa).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: partcolstatschar PARTITION(varpart=part1,charpart=aaa).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partcolstatschar
+PREHOOK: Input: default@partcolstatschar@varpart=part1/charpart=aaa
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table partcolstatschar partition (varpart='part1', charpart='aaa') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partcolstatschar
+POSTHOOK: Input: default@partcolstatschar@varpart=part1/charpart=aaa
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@partcolstatschar
+POSTHOOK: query: describe formatted partcolstatschar.value partition (varpart='part1', charpart='aaa')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@partcolstatschar
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+value string 0 18 6.833333333333333 7 from deserializer
+PREHOOK: query: drop table partcolstatschar
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@partcolstatschar
+PREHOOK: Output: default@partcolstatschar
+POSTHOOK: query: drop table partcolstatschar
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@partcolstatschar
+POSTHOOK: Output: default@partcolstatschar

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedApr 8, '15 at 4:41p
activeApr 8, '15 at 4:41p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase