FAQ
Repository: hive
Updated Branches:
   refs/heads/master be410d24f -> 8555d2aec


HIVE-12522: Wrong FS error during Tez merge files when warehouse and scratchdir are on different FS (Jason Dere, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8555d2ae
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8555d2ae
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8555d2ae

Branch: refs/heads/master
Commit: 8555d2aecf358501c5e3fddd63869bb68f2f3a20
Parents: be410d2
Author: Jason Dere <jdere@hortonworks.com>
Authored: Tue Dec 1 00:54:40 2015 -0800
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Tue Dec 1 00:54:40 2015 -0800

----------------------------------------------------------------------
  .../test/resources/testconfiguration.properties | 3 +
  .../hadoop/hive/ql/exec/tez/DagUtils.java | 5 +-
  .../queries/clientpositive/orc_merge_diff_fs.q | 94 ++++
  .../clientpositive/orc_merge_diff_fs.q.out | 462 +++++++++++++++++
  .../spark/orc_merge_diff_fs.q.out | 485 ++++++++++++++++++
  .../clientpositive/tez/orc_merge_diff_fs.q.out | 497 +++++++++++++++++++
  6 files changed, 1544 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0c5140c..935fd28 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -34,6 +34,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
    load_fs2.q,\
    load_hdfs_file_with_space_in_the_name.q,\
    non_native_window_udf.q, \
+ orc_merge_diff_fs.q,\
    optrstat_groupby.q,\
    parallel_orderby.q,\
    quotedid_smb.q,\
@@ -382,6 +383,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
    llapdecider.q,\
    mrr.q,\
    orc_ppd_basic.q,\
+ orc_merge_diff_fs.q,\
    tez_bmj_schema_evolution.q,\
    tez_dml.q,\
    tez_fsstat.q,\
@@ -1270,6 +1272,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
    orc_merge7.q,\
    orc_merge8.q,\
    orc_merge9.q,\
+ orc_merge_diff_fs.q,\
    orc_merge_incompat1.q,\
    orc_merge_incompat2.q,\
    parallel_orderby.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index db4d73d..6e196e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -576,8 +576,9 @@ public class DagUtils {
        // exist before jobClose (before renaming after job completion)
        Path tempOutPath = Utilities.toTempPath(outputPath);
        try {
- if (!fs.exists(tempOutPath)) {
- fs.mkdirs(tempOutPath);
+ FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
+ if (!tmpOutFS.exists(tempOutPath)) {
+ tmpOutFS.mkdirs(tempOutPath);
          }
        } catch (IOException e) {
          throw new RuntimeException(

http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q b/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
new file mode 100644
index 0000000..a8ab7b8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
@@ -0,0 +1,94 @@
+set hive.explain.user=false;
+set hive.merge.orcfile.stripe.level=false;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.optimize.sort.dynamic.partition=false;
+set mapred.min.split.size=1000;
+set mapred.max.split.size=2000;
+set tez.grouping.min-size=1000;
+set tez.grouping.max-size=2000;
+set hive.merge.tezfiles=false;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.merge.sparkfiles=false;
+
+set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir}/orc_merge_diff_fs;
+
+-- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1;
+DROP TABLE orcfile_merge1b;
+DROP TABLE orcfile_merge1c;
+
+CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
+CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
+CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
+
+-- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/;
+
+set hive.merge.tezfiles=true;
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
+-- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/;
+
+set hive.merge.orcfile.stripe.level=true;
+-- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+-- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t;
+
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t;
+
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t;
+
+select count(*) from orcfile_merge1;
+select count(*) from orcfile_merge1b;
+select count(*) from orcfile_merge1c;
+
+DROP TABLE orcfile_merge1;
+DROP TABLE orcfile_merge1b;
+DROP TABLE orcfile_merge1c;

http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
new file mode 100644
index 0000000..3f047da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
@@ -0,0 +1,462 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1c
+PREHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1b@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-5
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1c@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: select count(*) from orcfile_merge1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Output: default@orcfile_merge1c

http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
new file mode 100644
index 0000000..86df0a7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
@@ -0,0 +1,485 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1c
+PREHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1b@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-5
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1c@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: select count(*) from orcfile_merge1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Output: default@orcfile_merge1c

http://git-wip-us.apache.org/repos/asf/hive/blob/8555d2ae/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
new file mode 100644
index 0000000..77ea36a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
@@ -0,0 +1,497 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
+ PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcfile_merge1c
+PREHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- merge disabled
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 6 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge slow way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ File Merge
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-6
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ File Merge
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1b@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- auto-merge fast way
+EXPLAIN
+ INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 1
+ part
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ File Merge
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ File Merge
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcfile_merge1c@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
+ SELECT key, value, PMOD(HASH(key), 2) as part
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Verify
+SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1 WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1b WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
+ SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
+ FROM orcfile_merge1c WHERE ds='1'
+) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+-21975308766
+PREHOOK: query: select count(*) from orcfile_merge1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: select count(*) from orcfile_merge1c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from orcfile_merge1c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
+POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: DROP TABLE orcfile_merge1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1
+PREHOOK: Output: default@orcfile_merge1
+POSTHOOK: query: DROP TABLE orcfile_merge1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1
+POSTHOOK: Output: default@orcfile_merge1
+PREHOOK: query: DROP TABLE orcfile_merge1b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1b
+PREHOOK: Output: default@orcfile_merge1b
+POSTHOOK: query: DROP TABLE orcfile_merge1b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1b
+POSTHOOK: Output: default@orcfile_merge1b
+PREHOOK: query: DROP TABLE orcfile_merge1c
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orcfile_merge1c
+PREHOOK: Output: default@orcfile_merge1c
+POSTHOOK: query: DROP TABLE orcfile_merge1c
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orcfile_merge1c
+POSTHOOK: Output: default@orcfile_merge1c

Search Discussions

  • Jdere at Dec 1, 2015 at 8:59 am
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 9c0d09134 -> 39472b754


    HIVE-12522: Wrong FS error during Tez merge files when warehouse and scratchdir are on different FS (Jason Dere, reviewed by Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39472b75
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39472b75
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39472b75

    Branch: refs/heads/branch-1
    Commit: 39472b7540d26e32ce9ed52ade1c0cfe64ffa347
    Parents: 9c0d091
    Author: Jason Dere <jdere@hortonworks.com>
    Authored: Tue Dec 1 00:54:40 2015 -0800
    Committer: Jason Dere <jdere@hortonworks.com>
    Committed: Tue Dec 1 00:56:02 2015 -0800

    ----------------------------------------------------------------------
      .../test/resources/testconfiguration.properties | 3 +
      .../hadoop/hive/ql/exec/tez/DagUtils.java | 5 +-
      .../queries/clientpositive/orc_merge_diff_fs.q | 94 ++++
      .../clientpositive/orc_merge_diff_fs.q.out | 462 +++++++++++++++++
      .../spark/orc_merge_diff_fs.q.out | 485 ++++++++++++++++++
      .../clientpositive/tez/orc_merge_diff_fs.q.out | 497 +++++++++++++++++++
      6 files changed, 1544 insertions(+), 2 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
    index 6e0c4e0..836a67a 100644
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@ -33,6 +33,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
        load_fs2.q,\
        load_hdfs_file_with_space_in_the_name.q,\
        non_native_window_udf.q, \
    + orc_merge_diff_fs.q,\
        optrstat_groupby.q,\
        parallel_orderby.q,\
        ql_rewrite_gbtoidx.q,\
    @@ -337,6 +338,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
        lvj_mapjoin.q, \
        mrr.q,\
        orc_ppd_basic.q,\
    + orc_merge_diff_fs.q,\
        tez_bmj_schema_evolution.q,\
        tez_dml.q,\
        tez_fsstat.q,\
    @@ -1150,6 +1152,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
        orc_merge7.q,\
        orc_merge8.q,\
        orc_merge9.q,\
    + orc_merge_diff_fs.q,\
        orc_merge_incompat1.q,\
        orc_merge_incompat2.q,\
        parallel_orderby.q,\

    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    index 7822be9..c3860b3 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    @@ -574,8 +574,9 @@ public class DagUtils {
            // exist before jobClose (before renaming after job completion)
            Path tempOutPath = Utilities.toTempPath(outputPath);
            try {
    - if (!fs.exists(tempOutPath)) {
    - fs.mkdirs(tempOutPath);
    + FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
    + if (!tmpOutFS.exists(tempOutPath)) {
    + tmpOutFS.mkdirs(tempOutPath);
              }
            } catch (IOException e) {
              throw new RuntimeException(

    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q b/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
    new file mode 100644
    index 0000000..a8ab7b8
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/orc_merge_diff_fs.q
    @@ -0,0 +1,94 @@
    +set hive.explain.user=false;
    +set hive.merge.orcfile.stripe.level=false;
    +set hive.exec.dynamic.partition=true;
    +set hive.exec.dynamic.partition.mode=nonstrict;
    +set hive.optimize.sort.dynamic.partition=false;
    +set mapred.min.split.size=1000;
    +set mapred.max.split.size=2000;
    +set tez.grouping.min-size=1000;
    +set tez.grouping.max-size=2000;
    +set hive.merge.tezfiles=false;
    +set hive.merge.mapfiles=false;
    +set hive.merge.mapredfiles=false;
    +set hive.merge.sparkfiles=false;
    +
    +set hive.metastore.warehouse.dir=pfile://${system:test.tmp.dir}/orc_merge_diff_fs;
    +
    +-- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1;
    +DROP TABLE orcfile_merge1b;
    +DROP TABLE orcfile_merge1c;
    +
    +CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
    +CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
    +CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
    +
    +-- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/;
    +
    +set hive.merge.tezfiles=true;
    +set hive.merge.mapfiles=true;
    +set hive.merge.mapredfiles=true;
    +set hive.merge.sparkfiles=true;
    +-- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/;
    +
    +set hive.merge.orcfile.stripe.level=true;
    +-- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src;
    +
    +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/;
    +
    +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
    +-- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t;
    +
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t;
    +
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t;
    +
    +select count(*) from orcfile_merge1;
    +select count(*) from orcfile_merge1b;
    +select count(*) from orcfile_merge1c;
    +
    +DROP TABLE orcfile_merge1;
    +DROP TABLE orcfile_merge1b;
    +DROP TABLE orcfile_merge1c;

    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
    new file mode 100644
    index 0000000..3f047da
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out
    @@ -0,0 +1,462 @@
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1c
    +PREHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    + Stage-2 depends on stages: Stage-0
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 2 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
    + Stage-4
    + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
    + Stage-2 depends on stages: Stage-0
    + Stage-3
    + Stage-5
    + Stage-6 depends on stages: Stage-5
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-7
    + Conditional Operator
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    + Stage: Stage-3
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-5
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-6
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1b@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
    + Stage-4
    + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
    + Stage-2 depends on stages: Stage-0
    + Stage-3
    + Stage-5
    + Stage-6 depends on stages: Stage-5
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-7
    + Conditional Operator
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    + Stage: Stage-3
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-5
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-6
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1c@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: select count(*) from orcfile_merge1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1b
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1b
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1c
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1c
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Output: default@orcfile_merge1c

    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
    new file mode 100644
    index 0000000..86df0a7
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out
    @@ -0,0 +1,485 @@
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1c
    +PREHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    + Stage-2 depends on stages: Stage-0
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 2 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
    + Stage-4
    + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
    + Stage-2 depends on stages: Stage-0
    + Stage-3
    + Stage-5
    + Stage-6 depends on stages: Stage-5
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-7
    + Conditional Operator
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    + Stage: Stage-3
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Spark Merge File Work
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-5
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Spark Merge File Work
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-6
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1b@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
    + Stage-4
    + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
    + Stage-2 depends on stages: Stage-0
    + Stage-3
    + Stage-5
    + Stage-6 depends on stages: Stage-5
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-7
    + Conditional Operator
    +
    + Stage: Stage-4
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-2
    + Stats-Aggr Operator
    +
    + Stage: Stage-3
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Spark Merge File Work
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-5
    + Spark
    +#### A masked pattern was here ####
    + Vertices:
    + Spark Merge File Work
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-6
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1c@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: select count(*) from orcfile_merge1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1b
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1b
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1c
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1c
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Output: default@orcfile_merge1c

    http://git-wip-us.apache.org/repos/asf/hive/blob/39472b75/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
    new file mode 100644
    index 0000000..77ea36a
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/tez/orc_merge_diff_fs.q.out
    @@ -0,0 +1,497 @@
    +PREHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: -- SORT_QUERY_RESULTS
    +
    +DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: CREATE TABLE orcfile_merge1 (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: CREATE TABLE orcfile_merge1b (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: CREATE TABLE orcfile_merge1c (key INT, value STRING)
    + PARTITIONED BY (ds STRING, part STRING) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@orcfile_merge1c
    +PREHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- merge disabled
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-2 depends on stages: Stage-1
    + Stage-0 depends on stages: Stage-2
    + Stage-3 depends on stages: Stage-0
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-2
    + Dependency Collection
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1
    +
    + Stage: Stage-3
    + Stats-Aggr Operator
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 6 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge slow way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
    + Stage-5
    + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
    + Stage-0 depends on stages: Stage-2
    + Stage-3 depends on stages: Stage-0
    + Stage-4
    + Stage-6
    + Stage-7 depends on stages: Stage-6
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-8
    + Conditional Operator
    +
    + Stage: Stage-5
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-2
    + Dependency Collection
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-3
    + Stats-Aggr Operator
    +
    + Stage: Stage-4
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + File Merge
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-6
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + File Merge
    + Map Operator Tree:
    + TableScan
    + File Output Operator
    + compressed: false
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1b
    +
    + Stage: Stage-7
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1b@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1b@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1b PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- auto-merge fast way
    +EXPLAIN
    + INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
    + Stage-5
    + Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
    + Stage-0 depends on stages: Stage-2
    + Stage-3 depends on stages: Stage-0
    + Stage-4
    + Stage-6
    + Stage-7 depends on stages: Stage-6
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: src
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
    + outputColumnNames: _col0, _col1, _col2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-8
    + Conditional Operator
    +
    + Stage: Stage-5
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    + Stage: Stage-2
    + Dependency Collection
    +
    + Stage: Stage-0
    + Move Operator
    + tables:
    + partition:
    + ds 1
    + part
    + replace: true
    + table:
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
    + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
    + name: default.orcfile_merge1c
    +
    + Stage: Stage-3
    + Stats-Aggr Operator
    +
    + Stage: Stage-4
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + File Merge
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-6
    + Tez
    +#### A masked pattern was here ####
    + Vertices:
    + File Merge
    + Merge File Operator
    + Map Operator Tree:
    + ORC File Merge Operator
    + merge level: stripe
    + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
    +
    + Stage: Stage-7
    + Move Operator
    + files:
    + hdfs directory: true
    +#### A masked pattern was here ####
    +
    +PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@orcfile_merge1c@ds=1
    +POSTHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
    + SELECT key, value, PMOD(HASH(key), 2) as part
    + FROM src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Output: default@orcfile_merge1c@ds=1/part=1
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: orcfile_merge1c PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +Found 1 items
    +#### A masked pattern was here ####
    +PREHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- Verify
    +SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1 WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1b WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT SUM(HASH(c)) FROM (
    + SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
    + FROM orcfile_merge1c WHERE ds='1'
    +) t
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +-21975308766
    +PREHOOK: query: select count(*) from orcfile_merge1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1b
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1b
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1b@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: select count(*) from orcfile_merge1c
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +PREHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from orcfile_merge1c
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=0
    +POSTHOOK: Input: default@orcfile_merge1c@ds=1/part=1
    +#### A masked pattern was here ####
    +500
    +PREHOOK: query: DROP TABLE orcfile_merge1
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1
    +PREHOOK: Output: default@orcfile_merge1
    +POSTHOOK: query: DROP TABLE orcfile_merge1
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1
    +POSTHOOK: Output: default@orcfile_merge1
    +PREHOOK: query: DROP TABLE orcfile_merge1b
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1b
    +PREHOOK: Output: default@orcfile_merge1b
    +POSTHOOK: query: DROP TABLE orcfile_merge1b
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1b
    +POSTHOOK: Output: default@orcfile_merge1b
    +PREHOOK: query: DROP TABLE orcfile_merge1c
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@orcfile_merge1c
    +PREHOOK: Output: default@orcfile_merge1c
    +POSTHOOK: query: DROP TABLE orcfile_merge1c
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@orcfile_merge1c
    +POSTHOOK: Output: default@orcfile_merge1c

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 1, '15 at 8:54a
activeDec 1, '15 at 8:59a
posts2
users1
websitehive.apache.org

1 user in discussion

Jdere: 2 posts

People

Translate

site design / logo © 2021 Grokbase