Grokbase Groups Hive commits May 2015
FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-1.2 9253f5a0d -> 7b89fad81


HIVE-10658 - Insert with values clause may expose data that should be encrypted


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b89fad8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b89fad8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b89fad8

Branch: refs/heads/branch-1.2
Commit: 7b89fad8107b678a27d26931d5d93d91e9544a5a
Parents: 9253f5a
Author: Eugene Koifman <ekoifman@hortonworks.com>
Authored: Fri May 22 15:05:06 2015 -0700
Committer: Eugene Koifman <ekoifman@hortonworks.com>
Committed: Fri May 22 15:05:06 2015 -0700

----------------------------------------------------------------------
  .../test/resources/testconfiguration.properties | 3 +-
  .../org/apache/hadoop/hive/ql/parse/QB.java | 19 ++++++
  .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 64 ++++++++++++++++--
  .../apache/hadoop/hive/ql/parse/TestIUD.java | 7 ++
  .../clientpositive/encryption_insert_values.q | 15 +++++
  .../encryption_insert_partition_dynamic.q.out | 6 +-
  .../encryption_insert_partition_static.q.out | 6 +-
  .../encrypted/encryption_insert_values.q.out | 71 ++++++++++++++++++++
  8 files changed, 182 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b9d85f6..9e95d1b 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -350,7 +350,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
    encryption_load_data_to_encrypted_tables.q, \
    encryption_unencrypted_nonhdfs_external_tables.q \
    encryption_move_tbl.q \
- encryption_drop_table.q
+ encryption_drop_table.q \
+ encryption_insert_values.q

  beeline.positive.exclude=add_part_exist.q,\
    alter1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
index 7f4d0ff..0ddc221 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
@@ -19,6 +19,7 @@
  package org.apache.hadoop.hive.ql.parse;

  import java.util.ArrayList;
+import java.util.Collections;
  import java.util.HashMap;
  import java.util.LinkedHashMap;
  import java.util.List;
@@ -27,6 +28,7 @@ import java.util.Set;

  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.hive.ql.metadata.Table;
  import org.apache.hadoop.hive.ql.plan.CreateTableDesc;

@@ -55,6 +57,7 @@ public class QB {
    private boolean isAnalyzeRewrite;
    private CreateTableDesc tblDesc = null; // table descriptor of the final
    private CreateTableDesc directoryDesc = null ;
+ private List<Path> encryptedTargetTablePaths;

    // used by PTFs
    /*
@@ -387,4 +390,20 @@ public class QB {
      return havingClauseSubQueryPredicate;
    }

+ void addEncryptedTargetTablePath(Path p) {
+ if(encryptedTargetTablePaths == null) {
+ encryptedTargetTablePaths = new ArrayList<>();
+ }
+ encryptedTargetTablePaths.add(p);
+ }
+ /**
+ * List of dbName.tblName of encrypted target tables of insert statement
+ * Used to support Insert ... values(...)
+ */
+ List<Path> getEncryptedTargetTablePaths() {
+ if(encryptedTargetTablePaths == null) {
+ return Collections.emptyList();
+ }
+ return encryptedTargetTablePaths;
+ }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 675ad7a..bf889fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -206,6 +206,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
  import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.ShimLoader;
  import org.apache.hadoop.hive.shims.Utils;
  import org.apache.hadoop.io.IOUtils;
  import org.apache.hadoop.mapred.InputFormat;
@@ -718,8 +719,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
      return this.nameToSplitSample;
    }

- // Generate a temp table out of a value clause
- private ASTNode genValuesTempTable(ASTNode originalFrom) throws SemanticException {
+ /**
+ * Generate a temp table out of a value clause
+ * See also {@link #preProcessForInsert(ASTNode, QB)}
+ */
+ private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
+ Path dataDir = null;
+ if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
+ //currently only Insert into T values(...) is supported thus only 1 values clause
+ //and only 1 target table are possible. If/when support for
+ //select ... from values(...) is added an insert statement may have multiple
+ //encrypted target tables.
+ dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
+ }
      // Pick a name for the table
      SessionState ss = SessionState.get();
      String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
@@ -756,7 +768,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
      Path tablePath = null;
      FileSystem fs = null;
      try {
- tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ if(dataDir == null) {
+ tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ }
+ else {
+ //if target table of insert is encrypted, make sure temporary table data is stored
+ //similarly encrypted
+ tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
+ }
        fs = tablePath.getFileSystem(conf);
        fs.mkdirs(tablePath);
        Path dataFile = new Path(tablePath, "data_file");
@@ -1200,7 +1219,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
          } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
            // Create a temp table with the passed values in it then rewrite this portion of the
            // tree to be from that table.
- ASTNode newFrom = genValuesTempTable(frm);
+ ASTNode newFrom = genValuesTempTable(frm, qb);
            ast.setChild(0, newFrom);
            processTable(qb, newFrom);
          } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
@@ -10018,6 +10037,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {

      // 4. continue analyzing from the child ASTNode.
      Phase1Ctx ctx_1 = initPhase1Ctx();
+ preProcessForInsert(child, qb);
      if (!doPhase1(child, qb, ctx_1, plannerCtx)) {
        // if phase1Result false return
        return false;
@@ -10033,6 +10053,42 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
      return true;
    }

+ /**
+ * This will walk AST of an INSERT statement and assemble a list of target tables
+ * which are in an HDFS encryption zone. This is needed to make sure that so that
+ * the data from values clause of Insert ... select values(...) is stored securely.
+ * See also {@link #genValuesTempTable(ASTNode, QB)}
+ * @throws SemanticException
+ */
+ private void preProcessForInsert(ASTNode node, QB qb) throws SemanticException {
+ try {
+ if(!(node != null && node.getToken() != null && node.getToken().getType() == HiveParser.TOK_QUERY)) {
+ return;
+ }
+ for (Node child : node.getChildren()) {
+ //each insert of multi insert looks like
+ //(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
+ if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
+ continue;
+ }
+ ASTNode n = (ASTNode) ((ASTNode) child).getFirstChildWithType(HiveParser.TOK_INSERT_INTO);
+ if (n == null) continue;
+ n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TAB);
+ if (n == null) continue;
+ n = (ASTNode) n.getFirstChildWithType(HiveParser.TOK_TABNAME);
+ if (n == null) continue;
+ String[] dbTab = getQualifiedTableName(n);
+ Table t = db.getTable(dbTab[0], dbTab[1]);
+ Path tablePath = t.getPath();
+ if (isPathEncrypted(tablePath)) {
+ qb.addEncryptedTargetTablePath(tablePath);
+ }
+ }
+ }
+ catch(Exception ex) {
+ throw new SemanticException(ex);
+ }
+ }
    Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
      return genPlan(qb);
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
index febf6c5..9d4457c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java
@@ -297,4 +297,11 @@ public class TestIUD {
          "(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))",
        ast.toStringTree());
    }
+ @Test
+ public void testMultiInsert() throws ParseException {
+ ASTNode ast = parse("from S insert into T1 select a, b insert into T2 select c, d");
+ Assert.assertEquals("AST doesn't match", "(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME S))) " +
+ "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)))) " +
+ "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL c)) (TOK_SELEXPR (TOK_TABLE_OR_COL d)))))", ast.toStringTree());
+ }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/queries/clientpositive/encryption_insert_values.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_insert_values.q b/ql/src/test/queries/clientpositive/encryption_insert_values.q
new file mode 100644
index 0000000..2dd3e9a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_insert_values.q
@@ -0,0 +1,15 @@
+-- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE;
+CREATE TABLE encrypted_table (key INT, value STRING) LOCATION '${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table';
+CRYPTO CREATE_KEY --keyName key_128 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_128 --path ${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table;
+
+INSERT INTO encrypted_table values(1,'foo'),(2,'bar');
+
+select * from encrypted_table;
+
+-- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1;
+
+CRYPTO DELETE_KEY --keyName key_128;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
index cb6dc5c..31d9a6e 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
@@ -93,9 +93,9 @@ STAGE PLANS:
                  value expressions: _col0 (type: string), _col1 (type: string)
                  auto parallelism: false
        Path -> Alias:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
        Path -> Partition:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
            Partition
              base file name: Values__Tmp__Table__1
              input format: org.apache.hadoop.mapred.TextInputFormat
@@ -106,6 +106,7 @@ STAGE PLANS:
                columns.comments
                columns.types string:string
  #### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
                name default.values__tmp__table__1
                serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
                serialization.format 1
@@ -120,6 +121,7 @@ STAGE PLANS:
                  columns.comments
                  columns.types string:string
  #### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
                  name default.values__tmp__table__1
                  serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
                  serialization.format 1

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
index 8966608..c6e5ee1 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
@@ -96,9 +96,9 @@ STAGE PLANS:
                  value expressions: _col0 (type: string), _col1 (type: string)
                  auto parallelism: false
        Path -> Alias:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
        Path -> Partition:
-#### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
            Partition
              base file name: Values__Tmp__Table__1
              input format: org.apache.hadoop.mapred.TextInputFormat
@@ -109,6 +109,7 @@ STAGE PLANS:
                columns.comments
                columns.types string:string
  #### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
                name default.values__tmp__table__1
                serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
                serialization.format 1
@@ -123,6 +124,7 @@ STAGE PLANS:
                  columns.comments
                  columns.types string:string
  #### A masked pattern was here ####
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
                  name default.values__tmp__table__1
                  serialization.ddl struct values__tmp__table__1 { string tmp_values_col1, string tmp_values_col2}
                  serialization.format 1

http://git-wip-us.apache.org/repos/asf/hive/blob/7b89fad8/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
new file mode 100644
index 0000000..888a612
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out
@@ -0,0 +1,71 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS;
+
+DROP TABLE IF EXISTS encrypted_table PURGE
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encrypted_table
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encrypted_table
+Encryption key created: 'key_128'
+Encryption zone created: '/build/ql/test/data/warehouse/default/encrypted_table' using key: 'key_128'
+PREHOOK: query: INSERT INTO encrypted_table values(1,'foo'),(2,'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encrypted_table
+POSTHOOK: query: INSERT INTO encrypted_table values(1,'foo'),(2,'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encrypted_table
+POSTHOOK: Lineage: encrypted_table.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select * from encrypted_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+POSTHOOK: query: select * from encrypted_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+1 foo
+2 bar
+PREHOOK: query: -- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@values__tmp__table__1
+POSTHOOK: query: -- this checks that we've actually created temp table data under encrypted_table folder
+describe formatted values__tmp__table__1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@values__tmp__table__1
+# col_name data_type comment
+
+tmp_values_col1 string
+tmp_values_col2 string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging
+Table Type: MANAGED_TABLE
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 22, '15 at 10:05p
activeMay 22, '15 at 10:05p
posts1
users1
websitehive.apache.org

1 user in discussion

Ekoifman: 1 post

People

Translate

site design / logo © 2021 Grokbase