Grokbase Groups Hive commits May 2016
FAQ
Repository: hive
Updated Branches:
   refs/heads/master 7fb4b1fed -> 2ed47838d


HIVE-12643 : For self describing InputFormat don't replicate schema information in partitions (Ashutosh Chauhan via Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ed47838
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ed47838
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ed47838

Branch: refs/heads/master
Commit: 2ed47838dc6cfee3fb6f4470427e50a4495b2eba
Parents: 7fb4b1f
Author: Ashutosh Chauhan <hashutosh@apache.org>
Authored: Wed Dec 9 17:26:00 2015 -0800
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Mon May 23 16:51:12 2016 -0700

----------------------------------------------------------------------
  .../hadoop/hive/metastore/MetaStoreUtils.java | 68 ++++++++++++--------
  .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +-
  .../hive/ql/optimizer/GenMapRedUtils.java | 6 --
  .../hive/ql/optimizer/physical/Vectorizer.java | 6 +-
  .../hadoop/hive/ql/plan/PartitionDesc.java | 14 +++-
  .../clientpositive/quotedid_tblproperty.q.out | 4 +-
  .../tez/vector_partition_diff_num_cols.q.out | 2 +
  .../vector_partition_diff_num_cols.q.out | 2 +
  8 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 6bc882a..84b24ab 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -1012,8 +1012,38 @@ public class MetaStoreUtils {
      return schema;
    }

- public static Properties getSchema(
- org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+ public static Properties addCols(Properties schema, List<FieldSchema> cols) {
+
+ StringBuilder colNameBuf = new StringBuilder();
+ StringBuilder colTypeBuf = new StringBuilder();
+ StringBuilder colComment = new StringBuilder();
+
+ boolean first = true;
+ for (FieldSchema col : cols) {
+ if (!first) {
+ colNameBuf.append(",");
+ colTypeBuf.append(":");
+ colComment.append('\0');
+ }
+ colNameBuf.append(col.getName());
+ colTypeBuf.append(col.getType());
+ colComment.append((null != col.getComment()) ? col.getComment() : "");
+ first = false;
+ }
+ schema.setProperty(
+ org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
+ colNameBuf.toString());
+ String colTypes = colTypeBuf.toString();
+ schema.setProperty(
+ org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
+ colTypes);
+ schema.setProperty("columns.comments", colComment.toString());
+
+ return schema;
+
+ }
+
+ public static Properties getSchemaWithoutCols(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
        org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
        Map<String, String> parameters, String databaseName, String tableName,
        List<FieldSchema> partitionKeys) {
@@ -1063,30 +1093,7 @@ public class MetaStoreUtils {
                  .getSerdeInfo().getSerializationLib());
        }
      }
- StringBuilder colNameBuf = new StringBuilder();
- StringBuilder colTypeBuf = new StringBuilder();
- StringBuilder colComment = new StringBuilder();
- boolean first = true;
- for (FieldSchema col : tblsd.getCols()) {
- if (!first) {
- colNameBuf.append(",");
- colTypeBuf.append(":");
- colComment.append('\0');
- }
- colNameBuf.append(col.getName());
- colTypeBuf.append(col.getType());
- colComment.append((null != col.getComment()) ? col.getComment() : "");
- first = false;
- }
- String colNames = colNameBuf.toString();
- String colTypes = colTypeBuf.toString();
- schema.setProperty(
- org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS,
- colNames);
- schema.setProperty(
- org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES,
- colTypes);
- schema.setProperty("columns.comments", colComment.toString());
+
      if (sd.getCols() != null) {
        schema.setProperty(
            org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL,
@@ -1130,6 +1137,15 @@ public class MetaStoreUtils {
      return schema;
    }

+ public static Properties getSchema(
+ org.apache.hadoop.hive.metastore.api.StorageDescriptor sd,
+ org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd,
+ Map<String, String> parameters, String databaseName, String tableName,
+ List<FieldSchema> partitionKeys) {
+
+ return addCols(getSchemaWithoutCols(sd, tblsd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols());
+ }
+
    /**
     * Convert FieldSchemas to columnNames.
     */

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 2ab9ed2..8144c3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -732,7 +732,7 @@ public final class Utilities {
    }

    public static PartitionDesc getPartitionDesc(Partition part) throws HiveException {
- return (new PartitionDesc(part));
+ return new PartitionDesc(part);
    }

    public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part,

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 812af9a..7595065 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -527,9 +527,6 @@ public final class GenMapRedUtils {
      Map<String, String> props = tsOp.getConf().getOpProps();
      if (props != null) {
        Properties target = aliasPartnDesc.getProperties();
- if (target == null) {
- aliasPartnDesc.setProperties(target = new Properties());
- }
        target.putAll(props);
      }

@@ -668,9 +665,6 @@ public final class GenMapRedUtils {

        if (props != null) {
          Properties target = tblDesc.getProperties();
- if (target == null) {
- tblDesc.setProperties(target = new Properties());
- }
          target.putAll(props);
        }


http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 51e7a17..c1d6582 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1147,7 +1147,7 @@ public class Vectorizer implements PhysicalPlanResolver {
    class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {

      private final MapWork mWork;
- private VectorTaskColumnInfo vectorTaskColumnInfo;
+ private final VectorTaskColumnInfo vectorTaskColumnInfo;
      private final boolean isTez;

      public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez,
@@ -1205,9 +1205,9 @@ public class Vectorizer implements PhysicalPlanResolver {

    class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {

- private VectorTaskColumnInfo vectorTaskColumnInfo;
+ private final VectorTaskColumnInfo vectorTaskColumnInfo;

- private boolean isTez;
+ private final boolean isTez;

      private Operator<? extends OperatorDesc> rootVectorOp;


http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 4d627ef..fe09bdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -29,6 +29,7 @@ import java.util.Properties;

  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
  import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
  import org.apache.hadoop.hive.ql.exec.Utilities;
  import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -85,10 +86,17 @@ public class PartitionDesc implements Serializable, Cloneable {

    public PartitionDesc(final Partition part) throws HiveException {
      PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true);
- setProperties(part.getMetadataFromPartitionSchema());
+ if(Utilities.isInputFileFormatSelfDescribing(this)) {
+ // if IF is self describing no need to send column info per partition, since its not used anyway.
+ Table tbl = part.getTable();
+ setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(),
+ part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()));
+ } else {
+ setProperties(part.getMetadataFromPartitionSchema());
+ }
    }

- /**
+ /**
     * @param part Partition
     * @param tblDesc Table Descriptor
     * @param usePartSchemaProperties Use Partition Schema Properties to set the
@@ -190,7 +198,7 @@ public class PartitionDesc implements Serializable, Cloneable {
      Class<? extends OutputFormat> outputClass = outputFileFormatClass == null ? null :
        HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass);
      if (outputClass != null) {
- this.outputFileFormatClass = (Class<? extends HiveOutputFormat>)
+ this.outputFileFormatClass = (Class<? extends HiveOutputFormat>)
          CLASS_INTERNER.intern(outputClass);
      } else {
        this.outputFileFormatClass = outputClass;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
index ca1dbe6..3204c7d 100644
--- a/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_tblproperty.q.out
@@ -16,5 +16,5 @@ PREHOOK: Input: default@xyz
  POSTHOOK: query: describe xyz
  POSTHOOK: type: DESCTABLE
  POSTHOOK: Input: default@xyz
-valid_colname string
-invalid.colname string
+key string
+value string

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
index f23a359..9b75892 100644
--- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out
@@ -368,6 +368,7 @@ STAGE PLANS:
                          sort order:
                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                          value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
          Reducer 2
              Execution mode: vectorized
              Reduce Operator Tree:
@@ -477,6 +478,7 @@ STAGE PLANS:
                          sort order:
                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                          value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
          Reducer 2
              Execution mode: vectorized
              Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/2ed47838/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
index ef92b89..b224da8 100644
--- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
+++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out
@@ -346,6 +346,7 @@ STAGE PLANS:
                    sort order:
                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
        Reduce Operator Tree:
          Group By Operator
            aggregations: sum(VALUE._col0)
@@ -447,6 +448,7 @@ STAGE PLANS:
                    sort order:
                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
        Reduce Operator Tree:
          Group By Operator
            aggregations: sum(VALUE._col0)

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 23, '16 at 11:51p
activeMay 23, '16 at 11:51p
posts1
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 1 post

People

Translate

site design / logo © 2021 Grokbase