FAQ
Repository: hive
Updated Branches:
   refs/heads/master 22fc39796 -> 114eabcbe


HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/114eabcb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/114eabcb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/114eabcb

Branch: refs/heads/master
Commit: 114eabcbea24491607f4f933e5033a5986b3b2f4
Parents: 22fc397
Author: ctang <ctang.ma@gmail.com>
Authored: Fri Dec 4 10:46:23 2015 -0500
Committer: ctang <ctang.ma@gmail.com>
Committed: Fri Dec 4 10:46:23 2015 -0500

----------------------------------------------------------------------
  .../apache/hadoop/hive/common/FileUtils.java | 66 ++++++++--
  .../test/resources/testconfiguration.properties | 3 +-
  .../apache/hadoop/hive/ql/metadata/Hive.java | 26 +++-
  .../clientpositive/encryption_with_trash.q | 33 +++++
  .../encrypted/encryption_with_trash.q.out | 122 +++++++++++++++++++
  5 files changed, 234 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index f943781..5dd9f40 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -574,7 +574,7 @@ public final class FileUtils {
    }

    /**
- * Deletes all files under a directory, sending them to the trash. Leaves the directory as is.
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
     * @param fs FileSystem to use
     * @param f path of directory
     * @param conf hive configuration
@@ -582,17 +582,34 @@ public final class FileUtils {
     * @throws FileNotFoundException
     * @throws IOException
     */
- public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) throws FileNotFoundException, IOException {
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf)
+ throws FileNotFoundException, IOException {
+ return trashFilesUnderDir(fs, f, conf, true);
+ }
+
+ /**
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
+ * @param fs FileSystem to use
+ * @param f path of directory
+ * @param conf hive configuration
+ * @param forceDelete whether to force delete files if trashing does not succeed
+ * @return true if deletion successful
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf,
+ boolean forceDelete) throws FileNotFoundException, IOException {
      FileStatus[] statuses = fs.listStatus(f, HIDDEN_FILES_PATH_FILTER);
      boolean result = true;
      for (FileStatus status : statuses) {
- result = result & moveToTrash(fs, status.getPath(), conf);
+ result = result & moveToTrash(fs, status.getPath(), conf, forceDelete);
      }
      return result;
    }

    /**
- * Move a particular file or directory to the trash.
+ * Move a particular file or directory to the trash. If for a certain reason the trashing fails
+ * it will force deletes the file or directory
     * @param fs FileSystem to use
     * @param f path of file or directory to move to trash.
     * @param conf
@@ -600,18 +617,47 @@ public final class FileUtils {
     * @throws IOException
     */
    public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) throws IOException {
+ return moveToTrash(fs, f, conf, true);
+ }
+
+ /**
+ * Move a particular file or directory to the trash.
+ * @param fs FileSystem to use
+ * @param f path of file or directory to move to trash.
+ * @param conf
+ * @param forceDelete whether force delete the file or directory if trashing fails
+ * @return true if move successful
+ * @throws IOException
+ */
+ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete)
+ throws IOException {
      LOG.info("deleting " + f);
      HadoopShims hadoopShim = ShimLoader.getHadoopShims();

- if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) {
- LOG.info("Moved to trash: " + f);
- return true;
+ boolean result = false;
+ try {
+ result = hadoopShim.moveToAppropriateTrash(fs, f, conf);
+ if (result) {
+ LOG.info("Moved to trash: " + f);
+ return true;
+ }
+ } catch (IOException ioe) {
+ if (forceDelete) {
+ // for whatever failure reason including that trash has lower encryption zone
+ // retry with force delete
+ LOG.warn(ioe.getMessage() + "; Force to delete it.");
+ } else {
+ throw ioe;
+ }
      }

- boolean result = fs.delete(f, true);
- if (!result) {
- LOG.error("Failed to delete " + f);
+ if (forceDelete) {
+ result = fs.delete(f, true);
+ if (!result) {
+ LOG.error("Failed to delete " + f);
+ }
      }
+
      return result;
    }


http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1031655..39ee9d3 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -471,7 +471,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
    encryption_drop_table.q \
    encryption_insert_values.q \
    encryption_drop_view.q \
- encryption_drop_partition.q
+ encryption_drop_partition.q \
+ encryption_with_trash.q

  beeline.positive.exclude=add_part_exist.q,\
    alter1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 488d923..a4ada24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2953,19 +2953,35 @@ private void constructOneLBLocationMap(FileStatus fSta,
        List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);

        if (oldPath != null) {
+ boolean oldPathDeleted = false;
+ boolean isOldPathUnderDestf = false;
          try {
            FileSystem fs2 = oldPath.getFileSystem(conf);
            if (fs2.exists(oldPath)) {
              // Do not delete oldPath if:
              // - destf is subdir of oldPath
              //if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2)))
- if (FileUtils.isSubDir(oldPath, destf, fs2)) {
- FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
+ isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2);
+ if (isOldPathUnderDestf) {
+ // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its
+ // existing content might result in incorrect (extra) data.
+ // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is
+ // not the destf or its subdir?
+ oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
              }
            }
- } catch (Exception e) {
- //swallow the exception
- LOG.warn("Directory " + oldPath.toString() + " cannot be removed: " + e, e);
+ } catch (IOException e) {
+ if (isOldPathUnderDestf) {
+ // if oldPath is a subdir of destf but it could not be cleaned
+ throw new HiveException("Directory " + oldPath.toString()
+ + " could not be cleaned up.", e);
+ } else {
+ //swallow the exception since it won't affect the final result
+ LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e);
+ }
+ }
+ if (isOldPathUnderDestf && !oldPathDeleted) {
+ throw new HiveException("Destination directory " + destf + " has not be cleaned up.");
          }
        }


http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/test/queries/clientpositive/encryption_with_trash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_with_trash.q b/ql/src/test/queries/clientpositive/encryption_with_trash.q
new file mode 100644
index 0000000..8f8789a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_with_trash.q
@@ -0,0 +1,33 @@
+set fs.trash.interval=5
+
+-- SORT_QUERY_RESULTS
+
+-- init
+drop table IF EXISTS encryptedTableSrc PURGE;
+drop table IF EXISTS unencryptedTable PURGE;
+
+create table encryptedTableSrc(key string, value string)
+LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc';
+
+create table encryptedTable(key string, value string) partitioned by (ds string)
+ LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTable';
+CRYPTO CREATE_KEY --keyName key_1 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTable;
+
+-- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502');
+
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+-- clean up
+drop table encryptedTable PURGE;
+drop table unencryptedTable PURGE;
+CRYPTO DELETE_KEY --keyName key_1;
+set fs.trash.interval=0

http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
new file mode 100644
index 0000000..3d1f75f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
@@ -0,0 +1,122 @@
+Warning: Value had a \n character in it.
+PREHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTableSrc
+POSTHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTableSrc
+PREHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTable
+POSTHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTable
+Encryption key created: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTableSrc' using key: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTable' using key: 'key_1'
+PREHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encryptedtablesrc
+POSTHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encryptedtablesrc
+POSTHOOK: Lineage: encryptedtablesrc.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encryptedtablesrc.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+4
+PREHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: -- clean up
+drop table encryptedTable PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Output: default@encryptedtable
+POSTHOOK: query: -- clean up
+drop table encryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Output: default@encryptedtable
+PREHOOK: query: drop table unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+Encryption key deleted: 'key_1'

Search Discussions

Discussion Posts

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 6 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 4, '15 at 3:47p
activeDec 6, '15 at 12:53a
posts6
users1
websitehive.apache.org

1 user in discussion

Ctang: 6 posts

People

Translate

site design / logo © 2021 Grokbase