FAQ
Repository: hive
Updated Branches:
   refs/heads/master 2bb5e63c9 -> c3f4ab44d


HIVE-12608: Parquet Schema Evolution doesn't work when a column is dropped from array<struct<>> (Mohammad Kamrul Islam, reviewed by Sergio Pena)

Change-Id: I3150c2c0493814112766d1627969acc592b6a4df


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c3f4ab44
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c3f4ab44
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c3f4ab44

Branch: refs/heads/master
Commit: c3f4ab44d07b7de4ae67cc92afa4dedf9de17133
Parents: 2bb5e63
Author: Mohammad Kamrul Islam <mislam77@yahoo.com>
Authored: Tue Dec 8 12:00:46 2015 -0600
Committer: Sergio Pena <sergio.pena@cloudera.com>
Committed: Tue Dec 8 12:00:46 2015 -0600

----------------------------------------------------------------------
  .../io/parquet/convert/HiveStructConverter.java | 6 +++--
  .../clientpositive/parquet_type_promotion.q | 5 ++++
  .../clientpositive/parquet_type_promotion.q.out | 27 ++++++++++++++++++--
  3 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c3f4ab44/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
index 1b43dd9..e4907d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
@@ -127,8 +127,10 @@ public class HiveStructConverter extends HiveGroupConverter {
          return hiveFieldTypeInfos.get(i);
        }
      }
- throw new RuntimeException("cannot find field " + field
- + " in " + hiveFieldNames);
+ //This means hive type doesn't refer this field that comes from file schema.
+ //i.e. the field is not required for hive table. It can occur due to schema
+ //evolution where some field is deleted.
+ return null;
    }

    private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) {

http://git-wip-us.apache.org/repos/asf/hive/blob/c3f4ab44/ql/src/test/queries/clientpositive/parquet_type_promotion.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_type_promotion.q b/ql/src/test/queries/clientpositive/parquet_type_promotion.q
index 46a0f56..c50221d 100644
--- a/ql/src/test/queries/clientpositive/parquet_type_promotion.q
+++ b/ql/src/test/queries/clientpositive/parquet_type_promotion.q
@@ -68,6 +68,11 @@ f2:int,f3:int>>) STORED AS PARQUET;
  INSERT INTO TABLE arrays_of_struct_to_map select array(named_struct("c1",1,"c2",2)), array(named_struct("f1",
  77,"f2",88,"f3",99)) FROM parquet_type_promotion LIMIT 1;
  SELECT * FROM arrays_of_struct_to_map;
+-- Testing schema evolution of dropping column from array<struct<>>
+ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int>>, locations2
+array<struct<f2:int>>);
+SELECT * FROM arrays_of_struct_to_map;
+-- Testing schema evolution of adding columns into array<struct<>>
  ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int,c2:int,c3:int>>, locations2
  array<struct<f1:int,f2:int,f3:int>>);
  SELECT * FROM arrays_of_struct_to_map;

http://git-wip-us.apache.org/repos/asf/hive/blob/c3f4ab44/ql/src/test/results/clientpositive/parquet_type_promotion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_type_promotion.q.out b/ql/src/test/results/clientpositive/parquet_type_promotion.q.out
index dd08631..55f9b27 100644
--- a/ql/src/test/results/clientpositive/parquet_type_promotion.q.out
+++ b/ql/src/test/results/clientpositive/parquet_type_promotion.q.out
@@ -213,12 +213,35 @@ POSTHOOK: type: QUERY
  POSTHOOK: Input: default@arrays_of_struct_to_map
  #### A masked pattern was here ####
  [{"c1":1,"c2":2}] [{"f1":77,"f2":88,"f3":99}]
-PREHOOK: query: ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int,c2:int,c3:int>>, locations2
+PREHOOK: query: -- Testing schema evolution of dropping column from array<struct<>>
+ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int>>, locations2
+array<struct<f2:int>>)
+PREHOOK: type: ALTERTABLE_REPLACECOLS
+PREHOOK: Input: default@arrays_of_struct_to_map
+PREHOOK: Output: default@arrays_of_struct_to_map
+POSTHOOK: query: -- Testing schema evolution of dropping column from array<struct<>>
+ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int>>, locations2
+array<struct<f2:int>>)
+POSTHOOK: type: ALTERTABLE_REPLACECOLS
+POSTHOOK: Input: default@arrays_of_struct_to_map
+POSTHOOK: Output: default@arrays_of_struct_to_map
+PREHOOK: query: SELECT * FROM arrays_of_struct_to_map
+PREHOOK: type: QUERY
+PREHOOK: Input: default@arrays_of_struct_to_map
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM arrays_of_struct_to_map
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@arrays_of_struct_to_map
+#### A masked pattern was here ####
+[{"c1":1}] [{"f2":77}]
+PREHOOK: query: -- Testing schema evolution of adding columns into array<struct<>>
+ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int,c2:int,c3:int>>, locations2
  array<struct<f1:int,f2:int,f3:int>>)
  PREHOOK: type: ALTERTABLE_REPLACECOLS
  PREHOOK: Input: default@arrays_of_struct_to_map
  PREHOOK: Output: default@arrays_of_struct_to_map
-POSTHOOK: query: ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int,c2:int,c3:int>>, locations2
+POSTHOOK: query: -- Testing schema evolution of adding columns into array<struct<>>
+ALTER TABLE arrays_of_struct_to_map REPLACE COLUMNS (locations1 array<struct<c1:int,c2:int,c3:int>>, locations2
  array<struct<f1:int,f2:int,f3:int>>)
  POSTHOOK: type: ALTERTABLE_REPLACECOLS
  POSTHOOK: Input: default@arrays_of_struct_to_map

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 8, '15 at 8:02p
activeDec 8, '15 at 8:02p
posts1
users1
websitehive.apache.org

1 user in discussion

Spena: 1 post

People

Translate

site design / logo © 2021 Grokbase