FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-1.0 d687bfb81 -> 8026f3914


backport HIVE-9720: Metastore does not properly migrate column stats when renaming a table across databases (Chaoyu via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8026f391
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8026f391
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8026f391

Branch: refs/heads/branch-1.0
Commit: 8026f3914013825e224e62b6c540b3745459cb9e
Parents: d687bfb
Author: Pengcheng Xiong <pxiong@apache.org>
Authored: Mon Aug 31 14:26:35 2015 -0700
Committer: Pengcheng Xiong <pxiong@apache.org>
Committed: Mon Aug 31 14:26:35 2015 -0700

----------------------------------------------------------------------
  .../hadoop/hive/metastore/HiveAlterHandler.java | 161 +++++++++++++++++++
  .../hadoop/hive/metastore/MetaStoreUtils.java | 23 +++
  .../alter_table_invalidate_column_stats_2.q | 6 +
  .../alter_table_invalidate_column_stats_2.q.out | 51 ++++++
  4 files changed, 241 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index fc6215a..09c1c56 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -33,8 +33,11 @@ import org.apache.hadoop.hive.common.FileUtils;
  import org.apache.hadoop.hive.common.ObjectPair;
  import org.apache.hadoop.hive.conf.HiveConf;
  import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
  import org.apache.hadoop.hive.metastore.api.Database;
  import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidInputException;
  import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
  import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
  import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -42,6 +45,9 @@ import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
  import org.apache.hadoop.hive.metastore.api.Partition;
  import org.apache.hadoop.hive.metastore.api.Table;
  import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hive.common.util.HiveStringUtils;
+
+import com.google.common.collect.Lists;

  /**
   * Hive specific implementation of alter
@@ -191,6 +197,13 @@ public class HiveAlterHandler implements AlterHandler {
              Path newPartLocPath = new Path(oldUri.getScheme(), oldUri.getAuthority(), newPath);
              altps.add(ObjectPair.create(part, part.getSd().getLocation()));
              part.getSd().setLocation(newPartLocPath.toString());
+ String oldPartName = Warehouse.makePartName(oldt.getPartitionKeys(), part.getValues());
+ try {
+ //existing partition column stats is no longer valid, remove them
+ msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, part.getValues(), null);
+ } catch (InvalidInputException iie) {
+ throw new InvalidOperationException("Unable to update partition stats in table rename." + iie);
+ }
              msdb.alterPartition(dbname, name, part.getValues(), part);
            }
          }
@@ -201,6 +214,7 @@ public class HiveAlterHandler implements AlterHandler {
            // alterPartition()
            MetaStoreUtils.updateUnpartitionedTableStatsFast(db, newt, wh, false, true);
        }
+ updateTableColumnStatsForAlterTable(msdb, oldt, newt);
        // now finally call alter table
        msdb.alterTable(dbname, name, newt);
        // commit the changes
@@ -293,6 +307,7 @@ public class HiveAlterHandler implements AlterHandler {
          if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, tbl)) {
            MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true);
          }
+ updatePartColumnStats(msdb, dbname, name, new_part.getValues(), new_part);
          msdb.alterPartition(dbname, name, new_part.getValues(), new_part);
        } catch (InvalidObjectException e) {
          throw new InvalidOperationException("alter is not possible");
@@ -331,6 +346,15 @@ public class HiveAlterHandler implements AlterHandler {
        // if the external partition is renamed, the file should not change
        if (tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) {
          new_part.getSd().setLocation(oldPart.getSd().getLocation());
+ String oldPartName = Warehouse.makePartName(tbl.getPartitionKeys(), oldPart.getValues());
+ try {
+ //existing partition column stats is no longer valid, remove
+ msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, oldPart.getValues(), null);
+ } catch (NoSuchObjectException nsoe) {
+ //ignore
+ } catch (InvalidInputException iie) {
+ throw new InvalidOperationException("Unable to update partition stats in table rename." + iie);
+ }
          msdb.alterPartition(dbname, name, part_vals, new_part);
        } else {
          try {
@@ -377,6 +401,15 @@ public class HiveAlterHandler implements AlterHandler {
            if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, tbl)) {
              MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true);
            }
+ String oldPartName = Warehouse.makePartName(tbl.getPartitionKeys(), oldPart.getValues());
+ try {
+ //existing partition column stats is no longer valid, remove
+ msdb.deletePartitionColumnStatistics(dbname, name, oldPartName, oldPart.getValues(), null);
+ } catch (NoSuchObjectException nsoe) {
+ //ignore
+ } catch (InvalidInputException iie) {
+ throw new InvalidOperationException("Unable to update partition stats in table rename." + iie);
+ }
            msdb.alterPartition(dbname, name, part_vals, new_part);
          }
        }
@@ -443,6 +476,7 @@ public class HiveAlterHandler implements AlterHandler {
          if (MetaStoreUtils.requireCalStats(hiveConf, oldTmpPart, tmpPart, tbl)) {
            MetaStoreUtils.updatePartitionStatsFast(tmpPart, wh, false, true);
          }
+ updatePartColumnStats(msdb, dbname, name, oldTmpPart.getValues(), tmpPart);
        }
        msdb.alterPartitions(dbname, name, partValsList, new_parts);
      } catch (InvalidObjectException e) {
@@ -490,4 +524,131 @@ public class HiveAlterHandler implements AlterHandler {
      return new Path(currentUri.getScheme(), currentUri.getAuthority(),
          defaultNewPath.toUri().getPath());
    }
+
+ private void updatePartColumnStatsForAlterColumns(RawStore msdb, Partition oldPartition,
+ String oldPartName, List<String> partVals, List<FieldSchema> oldCols, Partition newPart)
+ throws MetaException, InvalidObjectException {
+ String dbName = oldPartition.getDbName();
+ String tableName = oldPartition.getTableName();
+ try {
+ List<String> oldPartNames = Lists.newArrayList(oldPartName);
+ List<String> oldColNames = new ArrayList<String>(oldCols.size());
+ for (FieldSchema oldCol : oldCols) {
+ oldColNames.add(oldCol.getName());
+ }
+ List<FieldSchema> newCols = newPart.getSd().getCols();
+ List<ColumnStatistics> partsColStats = msdb.getPartitionColumnStatistics(dbName, tableName,
+ oldPartNames, oldColNames);
+ assert (partsColStats.size() <= 1);
+ for (ColumnStatistics partColStats : partsColStats) { //actually only at most one loop
+ List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
+ for (ColumnStatisticsObj statsObj : statsObjs) {
+ boolean found =false;
+ for (FieldSchema newCol : newCols) {
+ if (statsObj.getColName().equals(newCol.getName())
+ && statsObj.getColType().equals(newCol.getType())) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ msdb.deletePartitionColumnStatistics(dbName, tableName, oldPartName, partVals,
+ statsObj.getColName());
+ }
+ }
+ }
+ } catch (NoSuchObjectException nsoe) {
+ LOG.debug("Could not find db entry." + nsoe);
+ //ignore
+ } catch (InvalidInputException iie) {
+ throw new InvalidObjectException
+ ("Invalid input to update partition column stats in alter table change columns" + iie);
+ }
+ }
+
+ private void updatePartColumnStats(RawStore msdb, String dbName, String tableName,
+ List<String> partVals, Partition newPart) throws MetaException, InvalidObjectException {
+ dbName = HiveStringUtils.normalizeIdentifier(dbName);
+ tableName = HiveStringUtils.normalizeIdentifier(tableName);
+ String newDbName = HiveStringUtils.normalizeIdentifier(newPart.getDbName());
+ String newTableName = HiveStringUtils.normalizeIdentifier(newPart.getTableName());
+
+ Table oldTable = msdb.getTable(dbName, tableName);
+ if (oldTable == null) {
+ return;
+ }
+
+ try {
+ String oldPartName = Warehouse.makePartName(oldTable.getPartitionKeys(), partVals);
+ String newPartName = Warehouse.makePartName(oldTable.getPartitionKeys(), newPart.getValues());
+ if (!dbName.equals(newDbName) || !tableName.equals(newTableName)
+ || !oldPartName.equals(newPartName)) {
+ msdb.deletePartitionColumnStatistics(dbName, tableName, oldPartName, partVals, null);
+ } else {
+ Partition oldPartition = msdb.getPartition(dbName, tableName, partVals);
+ if (oldPartition == null) {
+ return;
+ }
+ if (oldPartition.getSd() != null && newPart.getSd() != null) {
+ List<FieldSchema> oldCols = oldPartition.getSd().getCols();
+ if (!MetaStoreUtils.areSameColumns(oldCols, newPart.getSd().getCols())) {
+ updatePartColumnStatsForAlterColumns(msdb, oldPartition, oldPartName, partVals, oldCols, newPart);
+ }
+ }
+ }
+ } catch (NoSuchObjectException nsoe) {
+ LOG.debug("Could not find db entry." + nsoe);
+ //ignore
+ } catch (InvalidInputException iie) {
+ throw new InvalidObjectException("Invalid input to update partition column stats." + iie);
+ }
+ }
+
+ private void updateTableColumnStatsForAlterTable(RawStore msdb, Table oldTable, Table newTable)
+ throws MetaException, InvalidObjectException {
+ String dbName = oldTable.getDbName();
+ String tableName = oldTable.getTableName();
+ String newDbName = HiveStringUtils.normalizeIdentifier(newTable.getDbName());
+ String newTableName = HiveStringUtils.normalizeIdentifier(newTable.getTableName());
+
+ try {
+ if (!dbName.equals(newDbName) || !tableName.equals(newTableName)) {
+ msdb.deleteTableColumnStatistics(dbName, tableName, null);
+ } else {
+ List<FieldSchema> oldCols = oldTable.getSd().getCols();
+ List<FieldSchema> newCols = newTable.getSd().getCols();
+ if (!MetaStoreUtils.areSameColumns(oldCols, newCols)) {
+ List<String> oldColNames = new ArrayList<String>(oldCols.size());
+ for (FieldSchema oldCol : oldCols) {
+ oldColNames.add(oldCol.getName());
+ }
+
+ ColumnStatistics cs = msdb.getTableColumnStatistics(dbName, tableName, oldColNames);
+ if (cs == null) {
+ return;
+ }
+
+ List<ColumnStatisticsObj> statsObjs = cs.getStatsObj();
+ for (ColumnStatisticsObj statsObj : statsObjs) {
+ boolean found = false;
+ for (FieldSchema newCol : newCols) {
+ if (statsObj.getColName().equalsIgnoreCase(newCol.getName())
+ && statsObj.getColType().equals(newCol.getType())) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
+ }
+ }
+ }
+ }
+ } catch (NoSuchObjectException nsoe) {
+ LOG.debug("Could not find db entry." + nsoe);
+ } catch (InvalidInputException e) {
+ //should not happen since the input were verified before passed in
+ throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
+ }
+ }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 3cf9f17..35e39b3 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -552,6 +552,29 @@ public class MetaStoreUtils {
      }
    }

+ static boolean isCascadeNeededInAlterTable(Table oldTable, Table newTable) {
+ //currently cascade only supports add/replace columns and
+ //changing column type/position/name/comments
+ List<FieldSchema> oldCols = oldTable.getSd().getCols();
+ List<FieldSchema> newCols = newTable.getSd().getCols();
+ return !areSameColumns(oldCols, newCols);
+ }
+
+ static boolean areSameColumns(List<FieldSchema> oldCols, List<FieldSchema> newCols) {
+ if (oldCols.size() != newCols.size()) {
+ return false;
+ } else {
+ for (int i = 0; i < oldCols.size(); i++) {
+ FieldSchema oldCol = oldCols.get(i);
+ FieldSchema newCol = newCols.get(i);
+ if(!oldCol.equals(newCol)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
    /**
     * @return true if oldType and newType are compatible.
     * Two types are compatible if we have internal functions to cast one to another.

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q b/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
new file mode 100644
index 0000000..5dd82c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/alter_table_invalidate_column_stats_2.q
@@ -0,0 +1,6 @@
+create table testpart1(id int) partitioned by (dept string);
+alter table testpart1 add partition(dept='a');
+insert into table testpart1 partition(dept='a') values (1);
+analyze table testpart1 partition(dept='a') compute statistics for columns;
+alter table testpart1 rename to testpart1_rename;
+drop table testpart1_rename;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8026f391/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
new file mode 100644
index 0000000..8ff9e81
--- /dev/null
+++ b/ql/src/test/results/clientpositive/alter_table_invalidate_column_stats_2.q.out
@@ -0,0 +1,51 @@
+PREHOOK: query: create table testpart1(id int) partitioned by (dept string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: create table testpart1(id int) partitioned by (dept string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testpart1
+PREHOOK: query: alter table testpart1 add partition(dept='a')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: alter table testpart1 add partition(dept='a')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@testpart1
+POSTHOOK: Output: default@testpart1@dept=a
+PREHOOK: query: insert into table testpart1 partition(dept='a') values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@testpart1@dept=a
+POSTHOOK: query: insert into table testpart1 partition(dept='a') values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@testpart1@dept=a
+POSTHOOK: Lineage: testpart1 PARTITION(dept=a).id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: analyze table testpart1 partition(dept='a') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testpart1
+PREHOOK: Input: default@testpart1@dept=a
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testpart1 partition(dept='a') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testpart1
+POSTHOOK: Input: default@testpart1@dept=a
+#### A masked pattern was here ####
+PREHOOK: query: alter table testpart1 rename to testpart1_rename
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: default@testpart1
+PREHOOK: Output: default@testpart1
+POSTHOOK: query: alter table testpart1 rename to testpart1_rename
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: default@testpart1
+POSTHOOK: Output: default@testpart1
+POSTHOOK: Output: default@testpart1_rename
+PREHOOK: query: drop table testpart1_rename
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testpart1_rename
+PREHOOK: Output: default@testpart1_rename
+POSTHOOK: query: drop table testpart1_rename
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testpart1_rename
+POSTHOOK: Output: default@testpart1_rename

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedAug 31, '15 at 9:53p
activeAug 31, '15 at 9:53p
posts1
users1
websitehive.apache.org

1 user in discussion

Pxiong: 1 post

People

Translate

site design / logo © 2021 Grokbase