Grokbase Groups Hive commits May 2016
FAQ
Repository: hive
Updated Branches:
   refs/heads/master cbebb4d78 -> 96f2dc723


HIVE-13632: Hive failing on insert empty array into parquet table. (Yongzhi Chen, reviewed by Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/96f2dc72
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/96f2dc72
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/96f2dc72

Branch: refs/heads/master
Commit: 96f2dc723270bb4c38e5ab842371929c2c1c849a
Parents: cbebb4d
Author: Yongzhi Chen <ychena@apache.org>
Authored: Thu Apr 28 14:52:16 2016 -0400
Committer: Yongzhi Chen <ychena@apache.org>
Committed: Thu May 5 09:58:39 2016 -0400

----------------------------------------------------------------------
  .../serde/AbstractParquetMapInspector.java | 4 +-
  .../serde/ParquetHiveArrayInspector.java | 4 +-
  .../ql/io/parquet/write/DataWritableWriter.java | 67 ++++++++-------
  .../ql/io/parquet/TestDataWritableWriter.java | 29 +++++++
  .../serde/TestAbstractParquetMapInspector.java | 4 +-
  .../serde/TestParquetHiveArrayInspector.java | 4 +-
  .../parquet_array_map_emptynullvals.q | 20 +++++
  .../parquet_array_map_emptynullvals.q.out | 87 ++++++++++++++++++++
  8 files changed, 180 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
index 49bf1c5..e80206e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
@@ -60,7 +60,7 @@ public abstract class AbstractParquetMapInspector implements SettableMapObjectIn

      if (data instanceof ArrayWritable) {
        final Writable[] mapArray = ((ArrayWritable) data).get();
- if (mapArray == null || mapArray.length == 0) {
+ if (mapArray == null) {
          return null;
        }

@@ -90,7 +90,7 @@ public abstract class AbstractParquetMapInspector implements SettableMapObjectIn
      if (data instanceof ArrayWritable) {
        final Writable[] mapArray = ((ArrayWritable) data).get();

- if (mapArray == null || mapArray.length == 0) {
+ if (mapArray == null) {
          return -1;
        } else {
          return mapArray.length;

http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
index 05e92b5..55614a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
@@ -83,7 +83,7 @@ public class ParquetHiveArrayInspector implements SettableListObjectInspector {

      if (data instanceof ArrayWritable) {
        final Writable[] array = ((ArrayWritable) data).get();
- if (array == null || array.length == 0) {
+ if (array == null) {
          return -1;
        }

@@ -105,7 +105,7 @@ public class ParquetHiveArrayInspector implements SettableListObjectInspector {

      if (data instanceof ArrayWritable) {
        final Writable[] array = ((ArrayWritable) data).get();
- if (array == null || array.length == 0) {
+ if (array == null) {
          return null;
        }


http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
index 69272dc..1e26c19 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
@@ -259,21 +259,24 @@ public class DataWritableWriter {
      @Override
      public void write(Object value) {
        recordConsumer.startGroup();
- recordConsumer.startField(repeatedGroupName, 0);
-
        int listLength = inspector.getListLength(value);
- for (int i = 0; i < listLength; i++) {
- Object element = inspector.getListElement(value, i);
- recordConsumer.startGroup();
- if (element != null) {
- recordConsumer.startField(elementName, 0);
- elementWriter.write(element);
- recordConsumer.endField(elementName, 0);
+
+ if (listLength > 0) {
+ recordConsumer.startField(repeatedGroupName, 0);
+
+ for (int i = 0; i < listLength; i++) {
+ Object element = inspector.getListElement(value, i);
+ recordConsumer.startGroup();
+ if (element != null) {
+ recordConsumer.startField(elementName, 0);
+ elementWriter.write(element);
+ recordConsumer.endField(elementName, 0);
+ }
+ recordConsumer.endGroup();
          }
- recordConsumer.endGroup();
- }

- recordConsumer.endField(repeatedGroupName, 0);
+ recordConsumer.endField(repeatedGroupName, 0);
+ }
        recordConsumer.endGroup();
      }
    }
@@ -307,30 +310,32 @@ public class DataWritableWriter {
      @Override
      public void write(Object value) {
        recordConsumer.startGroup();
- recordConsumer.startField(repeatedGroupName, 0);

        Map<?, ?> mapValues = inspector.getMap(value);
- for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) {
- recordConsumer.startGroup();
- if (keyValue != null) {
- // write key element
- Object keyElement = keyValue.getKey();
- recordConsumer.startField(keyName, 0);
- keyWriter.write(keyElement);
- recordConsumer.endField(keyName, 0);
-
- // write value element
- Object valueElement = keyValue.getValue();
- if (valueElement != null) {
- recordConsumer.startField(valueName, 1);
- valueWriter.write(valueElement);
- recordConsumer.endField(valueName, 1);
+ if (mapValues != null && mapValues.size() > 0) {
+ recordConsumer.startField(repeatedGroupName, 0);
+ for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) {
+ recordConsumer.startGroup();
+ if (keyValue != null) {
+ // write key element
+ Object keyElement = keyValue.getKey();
+ recordConsumer.startField(keyName, 0);
+ keyWriter.write(keyElement);
+ recordConsumer.endField(keyName, 0);
+
+ // write value element
+ Object valueElement = keyValue.getValue();
+ if (valueElement != null) {
+ recordConsumer.startField(valueName, 1);
+ valueWriter.write(valueElement);
+ recordConsumer.endField(valueName, 1);
+ }
            }
+ recordConsumer.endGroup();
          }
- recordConsumer.endGroup();
- }

- recordConsumer.endField(repeatedGroupName, 0);
+ recordConsumer.endField(repeatedGroupName, 0);
+ }
        recordConsumer.endGroup();
      }
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
index 7049139..934ae9f 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
@@ -411,6 +411,35 @@ public class TestDataWritableWriter {
    }

    @Test
+ public void testEmptyArrays() throws Exception {
+ String columnNames = "arrayCol";
+ String columnTypes = "array<int>";
+
+ String fileSchema = "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group array {\n"
+ + " optional int32 array_element;\n"
+ + " }\n"
+ + " }\n"
+ + "}\n";
+
+ ArrayWritable hiveRecord = createGroup(
+ new ArrayWritable(Writable.class) // Empty array
+ );
+
+ // Write record to Parquet format
+ writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord));
+
+ // Verify record was written correctly to Parquet
+ startMessage();
+ startField("arrayCol", 0);
+ startGroup();
+ endGroup();
+ endField("arrayCol", 0);
+ endMessage();
+ }
+
+ @Test
    public void testArrayOfArrays() throws Exception {
      String columnNames = "array_of_arrays";
      String columnTypes = "array<array<int>>";

http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
index f5d9cb4..6af8c53 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
@@ -62,8 +62,8 @@ public class TestAbstractParquetMapInspector extends TestCase {
    @Test
    public void testEmptyContainer() {
      final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]);
- assertEquals("Wrong size", -1, inspector.getMapSize(map));
- assertNull("Should be null", inspector.getMap(map));
+ assertEquals("Wrong size", 0, inspector.getMapSize(map));
+ assertNotNull("Should not be null", inspector.getMap(map));
    }

    @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
index 0ce654d..9e0c1ff 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
@@ -51,8 +51,8 @@ public class TestParquetHiveArrayInspector extends TestCase {
    @Test
    public void testEmptyContainer() {
      final ArrayWritable list = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]);
- assertEquals("Wrong size", -1, inspector.getListLength(list));
- assertNull("Should be null", inspector.getList(list));
+ assertEquals("Wrong size", 0, inspector.getListLength(list));
+ assertNotNull("Should not be null", inspector.getList(list));
      assertNull("Should be null", inspector.getListElement(list, 0));
    }


http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
new file mode 100644
index 0000000..eeae5cf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
@@ -0,0 +1,20 @@
+drop table if exists testSets;
+drop table if exists testSets2;
+create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet;
+
+insert into table testSets select 'abcd', array(), map() from src limit 1;
+
+create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet;
+insert into table testSets2 select * from testSets;
+select * from testSets2;
+drop table testSets;
+drop table testSets2;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/96f2dc72/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
new file mode 100644
index 0000000..4608607
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
@@ -0,0 +1,87 @@
+PREHOOK: query: drop table if exists testSets
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testSets
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists testSets2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testSets2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testSets
+POSTHOOK: query: create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testSets
+PREHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testsets
+POSTHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testsets
+POSTHOOK: Lineage: testsets.arrayvalues EXPRESSION []
+POSTHOOK: Lineage: testsets.key SIMPLE []
+POSTHOOK: Lineage: testsets.mapvalues EXPRESSION []
+PREHOOK: query: create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testSets2
+POSTHOOK: query: create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testSets2
+PREHOOK: query: insert into table testSets2 select * from testSets
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsets
+PREHOOK: Output: default@testsets2
+POSTHOOK: query: insert into table testSets2 select * from testSets
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsets
+POSTHOOK: Output: default@testsets2
+POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE [(testsets)testsets.FieldSchema(name:arrayvalues, type:array<string>, comment:null), ]
+POSTHOOK: Lineage: testsets2.key SIMPLE [(testsets)testsets.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: testsets2.mapvalues SIMPLE [(testsets)testsets.FieldSchema(name:mapvalues, type:map<string,string>, comment:null), ]
+PREHOOK: query: select * from testSets2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsets2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from testSets2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsets2
+#### A masked pattern was here ####
+abcd [] {}
+PREHOOK: query: drop table testSets
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testsets
+PREHOOK: Output: default@testsets
+POSTHOOK: query: drop table testSets
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testsets
+POSTHOOK: Output: default@testsets
+PREHOOK: query: drop table testSets2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testsets2
+PREHOOK: Output: default@testsets2
+POSTHOOK: query: drop table testSets2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testsets2
+POSTHOOK: Output: default@testsets2

Search Discussions

Discussion Posts

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 5, '16 at 2:03p
activeMay 5, '16 at 3:08p
posts2
users1
websitehive.apache.org

1 user in discussion

Ychena: 2 posts

People

Translate

site design / logo © 2021 Grokbase