FAQ
Author: rhbutani
Date: Tue Apr 22 19:07:15 2014
New Revision: 1589253

URL: http://svn.apache.org/r1589253
Log:
HIVE-5376 Hive does not honor type for partition columns when altering column type (Hari Sankar via Sergey, Harish Butani)

Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
     hive/trunk/ql/src/test/queries/clientpositive/alter_partition_coltype.q
     hive/trunk/ql/src/test/results/clientpositive/alter_partition_coltype.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1589253&r1=1589252&r2=1589253&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Apr 22 19:07:15 2014
@@ -38,11 +38,13 @@ import java.util.HashMap;
  import java.util.Iterator;
  import java.util.List;
  import java.util.Map;
+import java.util.Properties;
  import java.util.Map.Entry;
  import java.util.Set;
  import java.util.SortedSet;
  import java.util.TreeSet;

+import org.apache.commons.lang.ArrayUtils;
  import org.apache.commons.lang.StringEscapeUtils;
  import org.apache.commons.lang.StringUtils;
  import org.apache.commons.logging.Log;
@@ -114,6 +116,7 @@ import org.apache.hadoop.hive.ql.metadat
  import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter;
  import org.apache.hadoop.hive.ql.parse.AlterTablePartMergeFilesDesc;
  import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
  import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
  import org.apache.hadoop.hive.ql.plan.AlterDatabaseDesc;
  import org.apache.hadoop.hive.ql.plan.AlterIndexDesc;
@@ -180,7 +183,14 @@ import org.apache.hadoop.hive.serde2.Met
  import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
  import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
  import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
  import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  import org.apache.hadoop.hive.shims.HadoopShims;
  import org.apache.hadoop.hive.shims.ShimLoader;
  import org.apache.hadoop.io.IOUtils;
@@ -1255,7 +1265,56 @@ public class DDLTask extends Task<DDLWor
      assert(tbl.isPartitioned());

      List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
-
+
+ //Check if the existing partition values can be type casted to the new column type
+ // with a non null value before trying to alter the partition column type.
+ try {
+ Set<Partition> partitions = db.getAllPartitionsOf(tbl);
+ int colIndex = -1;
+ for(FieldSchema col : tbl.getTTable().getPartitionKeys()) {
+ colIndex++;
+ if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
+ break;
+ }
+ }
+
+ if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
+ throw new HiveException("Cannot find partition column " +
+ alterPartitionDesc.getPartKeySpec().getName());
+ }
+
+ TypeInfo expectedType =
+ TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
+ ObjectInspector outputOI =
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
+ Converter converter = ObjectInspectorConverters.getConverter(
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
+
+ // For all the existing partitions, check if the value can be type casted to a non-null object
+ for(Partition part : partitions) {
+ if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
+ continue;
+ }
+ try {
+ String value = part.getValues().get(colIndex);
+ Object convertedValue =
+ converter.convert(value);
+ if (convertedValue == null) {
+ throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " +
+ expectedType + " for value : " + value + " resulted in NULL object");
+ }
+ } catch (Exception e) {
+ throw new HiveException("Exception while converting " +
+ TypeInfoFactory.stringTypeInfo + " to " +
+ expectedType + " for value : " + part.getValues().get(colIndex));
+ }
+ }
+ } catch(Exception e) {
+ throw new HiveException(
+ "Exception while checking type conversion of existing partition values to " +
+ alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
+ }
+
      for(FieldSchema col : tbl.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
          newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());

Modified: hive/trunk/ql/src/test/queries/clientpositive/alter_partition_coltype.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/alter_partition_coltype.q?rev=1589253&r1=1589252&r2=1589253&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/alter_partition_coltype.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/alter_partition_coltype.q Tue Apr 22 19:07:15 2014
@@ -2,56 +2,67 @@
  create table alter_coltype(key string, value string) partitioned by (dt string, ts string);

  -- insert and create a partition.
-insert overwrite table alter_coltype partition(dt='100x', ts='6:30pm') select * from src1;
+insert overwrite table alter_coltype partition(dt='100', ts='6.30') select * from src1;

  desc alter_coltype;

  -- select with paritition predicate.
-select count(*) from alter_coltype where dt = '100x';
+select count(*) from alter_coltype where dt = '100';

  -- alter partition key column data type for dt column.
--- alter table alter_coltype partition column (dt int);
+alter table alter_coltype partition column (dt int);

  -- load a new partition using new data type.
--- insert overwrite table alter_coltype partition(dt=10, ts='3.0') select * from src1;
+insert overwrite table alter_coltype partition(dt=100, ts='3.0') select * from src1;

  -- make sure the partition predicate still works.
--- select count(*) from alter_coltype where dt = '100x';
--- explain extended select count(*) from alter_coltype where dt = '100x';
-
--- select count(*) from alter_coltype where dt = '100';
+select count(*) from alter_coltype where dt = '100';
+explain extended select count(*) from alter_coltype where dt = '100';

  -- alter partition key column data type for ts column.
--- alter table alter_coltype partition column (ts double);
+alter table alter_coltype partition column (ts double);

--- alter table alter_coltype partition column (dt string);
+alter table alter_coltype partition column (dt string);

  -- load a new partition using new data type.
--- insert overwrite table alter_coltype partition(dt='100x', ts=3.0) select * from src1;
+insert overwrite table alter_coltype partition(dt='100', ts=3.0) select * from src1;

  -- validate partition key column predicate can still work.
--- select count(*) from alter_coltype where ts = '6:30pm';
--- explain extended select count(*) from alter_coltype where ts = '6:30pm';
+select count(*) from alter_coltype where ts = '6.30';
+explain extended select count(*) from alter_coltype where ts = '6.30';

  -- validate partition key column predicate on two different partition column data type
  -- can still work.
--- select count(*) from alter_coltype where ts = 3.0 and dt=10;
--- explain extended select count(*) from alter_coltype where ts = 3.0 and dt=10;
+select count(*) from alter_coltype where ts = 3.0 and dt=100;
+explain extended select count(*) from alter_coltype where ts = 3.0 and dt=100;

  -- query where multiple partition values (of different datatypes) are being selected
--- select key, value, dt, ts from alter_coltype where dt is not null;
--- explain extended select key, value, dt, ts from alter_coltype where dt is not null;
+select key, value, dt, ts from alter_coltype where dt is not null;
+explain extended select key, value, dt, ts from alter_coltype where dt is not null;

--- select count(*) from alter_coltype where ts = 3.0;
+select count(*) from alter_coltype where ts = 3.0;

  -- make sure the partition predicate still works.
--- select count(*) from alter_coltype where dt = '100x' or dt = '10';
--- explain extended select count(*) from alter_coltype where dt = '100x' or dt = '10';
+select count(*) from alter_coltype where dt = '100';

--- desc alter_coltype;
--- desc alter_coltype partition (dt='100x', ts='6:30pm');
--- desc alter_coltype partition (dt='100x', ts=3.0);
--- desc alter_coltype partition (dt=10, ts=3.0);
+desc alter_coltype;
+desc alter_coltype partition (dt='100', ts='6.30');
+desc alter_coltype partition (dt='100', ts=3.0);

  drop table alter_coltype;

+create table alterdynamic_part_table(intcol string) partitioned by (partcol1 string, partcol2 string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+insert into table alterdynamic_part_table partition(partcol1, partcol2) select '1', '1', '1' from src where key=150 limit 5;
+
+insert into table alterdynamic_part_table partition(partcol1, partcol2) select '1', '2', '1' from src where key=150 limit 5;
+insert into table alterdynamic_part_table partition(partcol1, partcol2) select NULL, '1', '1' from src where key=150 limit 5;
+
+alter table alterdynamic_part_table partition column (partcol1 int);
+
+explain extended select intcol from alterdynamic_part_table where partcol1='1' and partcol2='1';
+
+explain extended select intcol from alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__');
+select intcol from alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__');
\ No newline at end of file

Modified: hive/trunk/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/alter_partition_coltype.q.out?rev=1589253&r1=1589252&r2=1589253&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/alter_partition_coltype.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/alter_partition_coltype.q.out Tue Apr 22 19:07:15 2014 differ

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedApr 22, '14 at 7:07p
activeApr 22, '14 at 7:07p
posts1
users1
websitehive.apache.org

1 user in discussion

Rhbutani: 1 post

People

Translate

site design / logo © 2021 Grokbase