Grokbase Groups Hive commits May 2015
FAQ
Repository: hive
Updated Branches:
   refs/heads/llap d755140d6 -> b8acbb6ef


http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out b/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
deleted file mode 100644
index 5d5b850..0000000
--- a/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
+++ /dev/null
@@ -1,1566 +0,0 @@
-PREHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint < 2000000000) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint < 2000000000) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
--- The one in memory will eventually exceed memory limit, but won't spill.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
--- The one in memory will eventually exceed memory limit, but won't spill.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint < 2000000000) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint < 2000000000) (type: boolean)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: cint is not null (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: cint is not null (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
--- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
--- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: cint is not null (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: cint is not null (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Base result for outer join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for outer join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3155128
-PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cint (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3155128
-PREHOOK: query: -- Partitioned table
-create table parttbl (key string, value char(20)) partitioned by (dt char(10))
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@parttbl
-POSTHOOK: query: -- Partitioned table
-create table parttbl (key string, value char(20)) partitioned by (dt char(10))
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@parttbl
-PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
- select * from src
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@parttbl@dt=2000-01-01
-POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
- select * from src
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@parttbl@dt=2000-01-01
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
- select * from src1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src1
-PREHOOK: Output: default@parttbl@dt=2000-01-02
-POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
- select * from src1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src1
-POSTHOOK: Output: default@parttbl@dt=2000-01-02
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- No spill, base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- No spill, base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: p1
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: p2
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- No spill, 2 partitions created in memory
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- No spill, 2 partitions created in memory
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: p1
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: p2
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- Spill case base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Spill case base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: p1
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: p2
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: p1
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: string)
- 1 key (type: string)
- input vertices:
- 1 Map 3
- Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Map 3
- Map Operator Tree:
- TableScan
- alias: p2
- Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string)
- sort order: +
- Map-reduce partition columns: key (type: string)
- Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: drop table parttbl
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@parttbl
-PREHOOK: Output: default@parttbl
-POSTHOOK: query: drop table parttbl
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Output: default@parttbl
-PREHOOK: query: -- Test vectorization
--- Test case borrowed from vector_decimal_mapjoin.q
-CREATE TABLE decimal_mapjoin STORED AS ORC AS
- SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
- CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
- cint
- FROM alltypesorc
-PREHOOK: type: CREATETABLE_AS_SELECT
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: database:default
-PREHOOK: Output: default@decimal_mapjoin
-POSTHOOK: query: -- Test vectorization
--- Test case borrowed from vector_decimal_mapjoin.q
-CREATE TABLE decimal_mapjoin STORED AS ORC AS
- SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
- CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
- cint
- FROM alltypesorc
-POSTHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@decimal_mapjoin
-PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: l
- Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint = 6981) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 6981 (type: int)
- 1 6981 (type: int)
- outputColumnNames: _col1, _col9
- input vertices:
- 1 Map 2
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: r
- Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint = 6981) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: 6981 (type: int)
- sort order: +
- Map-reduce partition columns: 6981 (type: int)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- value expressions: cdecimal2 (type: decimal(23,14))
- Execution mode: vectorized
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 -617.5607769230769
-6981 6981 5831542.269248378 -617.5607769230769
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 6984454.211097692
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 6984454.211097692
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 6984454.211097692
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: l
- Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint = 6981) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 6981 (type: int)
- 1 6981 (type: int)
- outputColumnNames: _col1, _col9
- input vertices:
- 1 Map 2
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Select Operator
- expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
- Map 2
- Map Operator Tree:
- TableScan
- alias: r
- Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (cint = 6981) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: 6981 (type: int)
- sort order: +
- Map-reduce partition columns: 6981 (type: int)
- Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
- value expressions: cdecimal2 (type: decimal(23,14))
- Execution mode: vectorized
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
- FROM decimal_mapjoin l
- JOIN decimal_mapjoin r ON l.cint = r.cint
- WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 -617.5607769230769
-6981 6981 5831542.269248378 -617.5607769230769
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 6984454.211097692
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 5831542.269248378 NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL -617.5607769230769
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL 6984454.211097692
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 NULL NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 6984454.211097692
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 -617.5607769230769
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 6984454.211097692
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-6981 6981 -515.621072973 NULL
-PREHOOK: query: DROP TABLE decimal_mapjoin
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@decimal_mapjoin
-PREHOOK: Output: default@decimal_mapjoin
-POSTHOOK: query: DROP TABLE decimal_mapjoin
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@decimal_mapjoin
-POSTHOOK: Output: default@decimal_mapjoin

Search Discussions

  • Prasanthj at May 7, 2015 at 1:20 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    index 0192fb5..cee9100 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    @@ -67,7 +67,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable {
        private boolean isBucketMapJoin;

        // Hash table memory usage allowed; used in case of non-staged mapjoin.
    - private float hashtableMemoryUsage;
    + private float hashtableMemoryUsage; // This is a percentage value between 0 and 1
        protected boolean genJoinKeys = true;

        private boolean isHybridHashJoin;

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
    index eb38b19..a45275b 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
    @@ -35,7 +35,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
          random = new Random(47496);

          VectorMapJoinFastLongHashMap map =
    - new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -55,7 +55,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
        public void testPutGetMultiple() throws Exception {
          random = new Random(2990);

    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -77,7 +77,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
        public void testGetNonExistent() throws Exception {
          random = new Random(16916);

    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -101,7 +101,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
          random = new Random(26078);

          // Make sure the map does not expand; should be able to find space.
    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -126,7 +126,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
          random = new Random(22470);

          // Start with capacity 1; make sure we expand on every put.
    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -147,7 +147,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
          random = new Random(40719);

          // Use a large capacity that doesn't require expansion, yet.
    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);

    @@ -172,7 +172,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
          random = new Random(46809);

          // Use a large capacity that doesn't require expansion, yet.
    - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0);
    + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);

          RandomLongStream randomLongKeyStream = new RandomLongStream(random);


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
    index 3c1b29a..944bda6 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
    @@ -35,7 +35,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
          random = new Random(47496);

          VectorMapJoinFastMultiKeyHashMap map =
    - new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -55,7 +55,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
        public void testPutGetMultiple() throws Exception {
          random = new Random(2990);

    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -77,7 +77,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
        public void testGetNonExistent() throws Exception {
          random = new Random(16916);

    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -101,7 +101,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
          random = new Random(26078);

          // Make sure the map does not expand; should be able to find space.
    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -126,7 +126,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
          random = new Random(22470);

          // Start with capacity 1; make sure we expand on every put.
    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
          RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
    @@ -147,7 +147,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
          random = new Random(5231);

          // Use a large capacity that doesn't require expansion, yet.
    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);

    @@ -178,7 +178,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
          random = new Random(46809);

          // Use a large capacity that doesn't require expansion, yet.
    - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0);
    + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);

          RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
    index 096c890..e92504a 100644
    --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
    +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
    @@ -42,6 +42,8 @@ select * from dest2;

      set hive.auto.convert.join.noconditionaltask=true;
      set hive.auto.convert.join.noconditionaltask.size=200;
    +set hive.mapjoin.hybridgrace.minwbsize=100;
    +set hive.mapjoin.hybridgrace.minnumpartitions=2;

      -- A SMB join followed by a mutli-insert
      explain

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
    new file mode 100644
    index 0000000..c7d925e
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
    @@ -0,0 +1,258 @@
    +-- Hybrid Grace Hash Join
    +-- Test basic functionalities:
    +-- 1. Various cases when hash partitions spill
    +-- 2. Partitioned table spilling
    +-- 3. Vectorization
    +
    +SELECT 1;
    +
    +set hive.auto.convert.join=true;
    +set hive.auto.convert.join.noconditionaltask.size=1300000;
    +set hive.mapjoin.optimized.hashtable.wbsize=880000;
    +set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +-- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +-- Two partitions are created. One in memory, one on disk on creation.
    +-- The one in memory will eventually exceed memory limit, but won't spill.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +;
    +
    +set hive.auto.convert.join.noconditionaltask.size=3000000;
    +set hive.mapjoin.optimized.hashtable.wbsize=100000;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +-- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +-- 16 partitions are created: 3 in memory, 13 on disk on creation.
    +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +-- Base result for outer join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +-- 32 partitions are created. 3 in memory, 29 on disk on creation.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +;
    +
    +
    +-- Partitioned table
    +create table parttbl (key string, value char(20)) partitioned by (dt char(10));
    +insert overwrite table parttbl partition(dt='2000-01-01')
    + select * from src;
    +insert overwrite table parttbl partition(dt='2000-01-02')
    + select * from src1;
    +
    +set hive.auto.convert.join.noconditionaltask.size=30000000;
    +set hive.mapjoin.optimized.hashtable.wbsize=10000000;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +-- No spill, base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +-- No spill, 2 partitions created in memory
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +
    +set hive.auto.convert.join.noconditionaltask.size=20000;
    +set hive.mapjoin.optimized.hashtable.wbsize=10000;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +-- Spill case base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +-- Spill case, one partition in memory, one spilled on creation
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +;
    +
    +drop table parttbl;
    +
    +
    +-- Test vectorization
    +-- Test case borrowed from vector_decimal_mapjoin.q
    +CREATE TABLE decimal_mapjoin STORED AS ORC AS
    + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
    + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
    + cint
    + FROM alltypesorc;
    +
    +SET hive.auto.convert.join=true;
    +SET hive.auto.convert.join.noconditionaltask=true;
    +SET hive.auto.convert.join.noconditionaltask.size=50000000;
    +set hive.mapjoin.optimized.hashtable.wbsize=10000;
    +SET hive.vectorized.execution.enabled=true;
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981;
    +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981;
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981;
    +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981;
    +
    +DROP TABLE decimal_mapjoin;

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
    new file mode 100644
    index 0000000..dd425f4
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
    @@ -0,0 +1,152 @@
    +-- Hybrid Grace Hash Join
    +-- Test n-way join
    +SELECT 1;
    +
    +set hive.auto.convert.join=true;
    +set hive.auto.convert.join.noconditionaltask=true;
    +set hive.auto.convert.join.noconditionaltask.size=10000000;
    +set hive.cbo.enable=false;
    +
    +
    +-- 3-way mapjoin (1 big table, 2 small tables)
    +SELECT 1;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key);
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key);
    +
    +
    +-- 4-way mapjoin (1 big table, 3 small tables)
    +SELECT 1;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key);
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key);
    +
    +
    +-- 2 sets of 3-way mapjoin under 2 different tasks
    +SELECT 1;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value);
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value);
    +
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value);
    +
    +
    +-- A chain of 2 sets of 3-way mapjoin under the same task
    +SELECT 1;
    +
    +set hive.mapjoin.hybridgrace.hashtable=false;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
    +
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
    +
    +set hive.mapjoin.hybridgrace.hashtable=true;
    +
    +EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
    +
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridhashjoin.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/hybridhashjoin.q b/ql/src/test/queries/clientpositive/hybridhashjoin.q
    deleted file mode 100644
    index fbd48ea..0000000
    --- a/ql/src/test/queries/clientpositive/hybridhashjoin.q
    +++ /dev/null
    @@ -1,250 +0,0 @@
    -set hive.auto.convert.join=true;
    -set hive.auto.convert.join.noconditionaltask.size=1300000;
    -set hive.mapjoin.optimized.hashtable.wbsize=880000;
    -set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
    -
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    --- Base result for inner join
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint
    - where c.cint < 2000000000) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint
    - where c.cint < 2000000000) t1
    -;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    --- Two partitions are created. One in memory, one on disk on creation.
    --- The one in memory will eventually exceed memory limit, but won't spill.
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint
    - where c.cint < 2000000000) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint
    - where c.cint < 2000000000) t1
    -;
    -
    -set hive.auto.convert.join.noconditionaltask.size=3000000;
    -set hive.mapjoin.optimized.hashtable.wbsize=100000;
    -
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    --- Base result for inner join
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    --- 16 partitions are created: 3 in memory, 13 on disk on creation.
    --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - inner join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -
    -
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    --- Base result for outer join
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - left outer join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - left outer join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    --- 32 partitions are created. 3 in memory, 29 on disk on creation.
    -explain
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - left outer join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -select count(*) from
    -(select c.ctinyint
    - from alltypesorc c
    - left outer join alltypesorc cd
    - on cd.cint = c.cint) t1
    -;
    -
    -
    --- Partitioned table
    -create table parttbl (key string, value char(20)) partitioned by (dt char(10));
    -insert overwrite table parttbl partition(dt='2000-01-01')
    - select * from src;
    -insert overwrite table parttbl partition(dt='2000-01-02')
    - select * from src1;
    -
    -set hive.auto.convert.join.noconditionaltask.size=30000000;
    -set hive.mapjoin.optimized.hashtable.wbsize=10000000;
    -
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    --- No spill, base result
    -explain
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    --- No spill, 2 partitions created in memory
    -explain
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -
    -set hive.auto.convert.join.noconditionaltask.size=20000;
    -set hive.mapjoin.optimized.hashtable.wbsize=10000;
    -
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    --- Spill case base result
    -explain
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    --- Spill case, one partition in memory, one spilled on creation
    -explain
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -select count(*) from
    -(select p1.value
    - from parttbl p1
    - inner join parttbl p2
    - on p1.key = p2.key) t1
    -;
    -
    -drop table parttbl;
    -
    -
    --- Test vectorization
    --- Test case borrowed from vector_decimal_mapjoin.q
    -CREATE TABLE decimal_mapjoin STORED AS ORC AS
    - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
    - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
    - cint
    - FROM alltypesorc;
    -
    -SET hive.auto.convert.join=true;
    -SET hive.auto.convert.join.noconditionaltask=true;
    -SET hive.auto.convert.join.noconditionaltask.size=50000000;
    -set hive.mapjoin.optimized.hashtable.wbsize=10000;
    -SET hive.vectorized.execution.enabled=true;
    -set hive.mapjoin.hybridgrace.hashtable=false;
    -
    -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    - FROM decimal_mapjoin l
    - JOIN decimal_mapjoin r ON l.cint = r.cint
    - WHERE l.cint = 6981;
    -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    - FROM decimal_mapjoin l
    - JOIN decimal_mapjoin r ON l.cint = r.cint
    - WHERE l.cint = 6981;
    -
    -set hive.mapjoin.hybridgrace.hashtable=true;
    -
    -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    - FROM decimal_mapjoin l
    - JOIN decimal_mapjoin r ON l.cint = r.cint
    - WHERE l.cint = 6981;
    -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    - FROM decimal_mapjoin l
    - JOIN decimal_mapjoin r ON l.cint = r.cint
    - WHERE l.cint = 6981;
    -
    -DROP TABLE decimal_mapjoin;

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_join_hash.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/tez_join_hash.q b/ql/src/test/queries/clientpositive/tez_join_hash.q
    index 3571cd5..67d89f8 100644
    --- a/ql/src/test/queries/clientpositive/tez_join_hash.q
    +++ b/ql/src/test/queries/clientpositive/tez_join_hash.q
    @@ -14,6 +14,8 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key;
      set hive.auto.convert.join=true;
      set hive.auto.convert.join.noconditionaltask=true;
      set hive.auto.convert.join.noconditionaltask.size=3000;
    +set hive.mapjoin.hybridgrace.minwbsize=350;
    +set hive.mapjoin.hybridgrace.minnumpartitions=8;

      explain
      select count(*) from (select x.key as key, y.value as value from

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_smb_main.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/tez_smb_main.q b/ql/src/test/queries/clientpositive/tez_smb_main.q
    index 6398762..1802709 100644
    --- a/ql/src/test/queries/clientpositive/tez_smb_main.q
    +++ b/ql/src/test/queries/clientpositive/tez_smb_main.q
    @@ -42,6 +42,8 @@ select count(*)
      from tab a join tab_part b on a.key = b.key;

      set hive.auto.convert.join.noconditionaltask.size=2000;
    +set hive.mapjoin.hybridgrace.minwbsize=500;
    +set hive.mapjoin.hybridgrace.minnumpartitions=4;
      explain
      select count (*)
      from tab a join tab_part b on a.key = b.key;
    @@ -50,6 +52,8 @@ select count(*)
      from tab a join tab_part b on a.key = b.key;

      set hive.auto.convert.join.noconditionaltask.size=1000;
    +set hive.mapjoin.hybridgrace.minwbsize=250;
    +set hive.mapjoin.hybridgrace.minnumpartitions=4;
      explain
      select count (*)
      from tab a join tab_part b on a.key = b.key;
    @@ -58,6 +62,8 @@ select count(*)
      from tab a join tab_part b on a.key = b.key;

      set hive.auto.convert.join.noconditionaltask.size=500;
    +set hive.mapjoin.hybridgrace.minwbsize=125;
    +set hive.mapjoin.hybridgrace.minnumpartitions=4;
      explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;
      select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10564 : webhcat should use webhcat-site.xml properties for controller job submission (Thejas Nair, reivewed by Eugene Koifman)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db69525e
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db69525e
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db69525e

    Branch: refs/heads/llap
    Commit: db69525e7265ec28590218b8fa9e0d40606a1c10
    Parents: 8fb8052
    Author: Thejas Nair <thejas@hortonworks.com>
    Authored: Mon May 4 15:16:12 2015 -0700
    Committer: Thejas Nair <thejas@hortonworks.com>
    Committed: Mon May 4 15:16:12 2015 -0700

    ----------------------------------------------------------------------
      .../hcatalog/templeton/tool/TempletonControllerJob.java | 11 +++++++++--
      1 file changed, 9 insertions(+), 2 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/db69525e/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    index 92e45cb..1f8ebf3 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    @@ -56,7 +56,7 @@ import org.apache.thrift.TException;
       * - run a keep alive thread so the job doesn't end.
       * - Optionally, store the stdout, stderr, and exit value of the child
       * in hdfs files.
    - *
    + *
       * A note on security. When jobs are submitted through WebHCat that use HCatalog, it means that
       * metastore access is required. Hive queries, of course, need metastore access. This in turn
       * requires delegation token to be obtained for metastore in a <em>secure cluster</em>. Since we
    @@ -80,6 +80,11 @@ public class TempletonControllerJob extends Configured implements Tool, JobSubmi
          this.appConf = conf;
        }

    + @Override
    + public Configuration getConf() {
    + return appConf;
    + }
    +
        private JobID submittedJobId;

        public String getSubmittedId() {
    @@ -95,7 +100,7 @@ public class TempletonControllerJob extends Configured implements Tool, JobSubmi
         * @see org.apache.hive.hcatalog.templeton.CompleteDelegator
         */
        @Override
    - public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException,
    + public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException,
                TException {
          if(LOG.isDebugEnabled()) {
            LOG.debug("Preparing to submit job: " + Arrays.toString(args));
    @@ -169,9 +174,11 @@ public class TempletonControllerJob extends Configured implements Tool, JobSubmi
          final UserGroupInformation ugi = UgiFactory.getUgi(user);
          UserGroupInformation real = ugi.getRealUser();
          return real.doAs(new PrivilegedExceptionAction<String>() {
    + @Override
            public String run() throws IOException, TException, InterruptedException {
              final HiveMetaStoreClient client = new HiveMetaStoreClient(c);
              return ugi.doAs(new PrivilegedExceptionAction<String>() {
    + @Override
                public String run() throws IOException, TException, InterruptedException {
                  String u = ugi.getUserName();
                  return client.getDelegationToken(u);
  • Prasanthj at May 7, 2015 at 1:20 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
    new file mode 100644
    index 0000000..4ad143c
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
    @@ -0,0 +1,1417 @@
    +PREHOOK: query: -- Hybrid Grace Hash Join
    +-- Test n-way join
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- Hybrid Grace Hash Join
    +-- Test n-way join
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables)
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables)
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +428
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +428
    +PREHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables)
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables)
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + Inner Join 0 to 3
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + 3 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + 3 Map 5
    + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: w
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map 5
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +5680
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + Inner Join 0 to 3
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + 3 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + 3 Map 5
    + Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: w
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map 5
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN srcpart w ON (x.key = w.key)
    +JOIN src y ON (y.key = x.key)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +5680
    +PREHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
    + Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
    + Reducer 5 <- Union 4 (SIMPLE_EDGE)
    + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 10
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 6
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 6
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Map 7
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 8
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 value (type: string)
    + 1 value (type: string)
    + 2 value (type: string)
    + input vertices:
    + 0 Map 7
    + 2 Map 10
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Group By Operator
    + keys: _col0 (type: bigint)
    + mode: hash
    + outputColumnNames: _col0
    + Reduce Output Operator
    + key expressions: _col0 (type: bigint)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: bigint)
    + Reducer 5
    + Reduce Operator Tree:
    + Group By Operator
    + keys: KEY._col0 (type: bigint)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Reducer 9
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Group By Operator
    + keys: _col0 (type: bigint)
    + mode: hash
    + outputColumnNames: _col0
    + Reduce Output Operator
    + key expressions: _col0 (type: bigint)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: bigint)
    + Union 4
    + Vertex: Union 4
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +428
    +452
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
    + Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
    + Reducer 5 <- Union 4 (SIMPLE_EDGE)
    + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 10
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + input vertices:
    + 0 Map 1
    + 2 Map 6
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 6
    + Map Operator Tree:
    + TableScan
    + alias: y
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
    + Map 7
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
    + Map 8
    + Map Operator Tree:
    + TableScan
    + alias: z
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: value is not null (type: boolean)
    + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 value (type: string)
    + 1 value (type: string)
    + 2 value (type: string)
    + input vertices:
    + 0 Map 7
    + 2 Map 10
    + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Group By Operator
    + keys: _col0 (type: bigint)
    + mode: hash
    + outputColumnNames: _col0
    + Reduce Output Operator
    + key expressions: _col0 (type: bigint)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: bigint)
    + Reducer 5
    + Reduce Operator Tree:
    + Group By Operator
    + keys: KEY._col0 (type: bigint)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Reducer 9
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Group By Operator
    + keys: _col0 (type: bigint)
    + mode: hash
    + outputColumnNames: _col0
    + Reduce Output Operator
    + key expressions: _col0 (type: bigint)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: bigint)
    + Union 4
    + Vertex: Union 4
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.key = z.key)
    +JOIN src y ON (y.key = x.key)
    +UNION
    +SELECT COUNT(*)
    +FROM src1 x JOIN srcpart z ON (x.value = z.value)
    +JOIN src y ON (y.value = x.value)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +428
    +452
    +PREHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
    + value expressions: value (type: string)
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z1
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (key < 'zzzzzzzz') (type: boolean)
    + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + outputColumnNames: _col1
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 _col1 (type: string)
    + 1 value (type: string)
    + 2 value (type: string)
    + input vertices:
    + 1 Map 5
    + 2 Map 6
    + Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: y1
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + Map 5
    + Map Operator Tree:
    + TableScan
    + alias: z2
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Map 6
    + Map Operator Tree:
    + TableScan
    + alias: y2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (value < 'zzzzzzzzzz') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +18256
    +PREHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
    + Reducer 3 <- Map 2 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: x
    + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
    + value expressions: value (type: string)
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: z1
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (key < 'zzzzzzzz') (type: boolean)
    + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + 2 key (type: string)
    + outputColumnNames: _col1
    + input vertices:
    + 0 Map 1
    + 2 Map 4
    + Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + Inner Join 0 to 2
    + keys:
    + 0 _col1 (type: string)
    + 1 value (type: string)
    + 2 value (type: string)
    + input vertices:
    + 1 Map 5
    + 2 Map 6
    + Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 4
    + Map Operator Tree:
    + TableScan
    + alias: y1
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
    + Map 5
    + Map Operator Tree:
    + TableScan
    + alias: z2
    + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
    + Map 6
    + Map Operator Tree:
    + TableScan
    + alias: y2
    + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (value < 'zzzzzzzzzz') (type: boolean)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: value (type: string)
    + sort order: +
    + Map-reduce partition columns: value (type: string)
    + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
    + Reducer 3
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Input: default@src1
    +PREHOOK: Input: default@srcpart
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT COUNT(*)
    +FROM src1 x
    +JOIN srcpart z1 ON (x.key = z1.key)
    +JOIN src y1 ON (x.key = y1.key)
    +JOIN srcpart z2 ON (x.value = z2.value)
    +JOIN src y2 ON (x.value = y2.value)
    +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
    + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Input: default@srcpart
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
    +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
    +#### A masked pattern was here ####
    +18256
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c72d073c
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c72d073c
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c72d073c

    Branch: refs/heads/llap
    Commit: c72d073c1fe2a07c80120647bb2170fb7e50d168
    Parents: 6db33a9
    Author: vikram <vikram@hortonworks.com>
    Authored: Fri May 1 10:54:21 2015 -0700
    Committer: vikram <vikram@hortonworks.com>
    Committed: Fri May 1 11:07:06 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
      .../test/resources/testconfiguration.properties | 3 +-
      .../hadoop/hive/ql/exec/HashTableLoader.java | 4 +-
      .../hadoop/hive/ql/exec/MapJoinOperator.java | 169 +-
      .../hadoop/hive/ql/exec/mr/HashTableLoader.java | 2 +-
      .../persistence/BytesBytesMultiHashMap.java | 1 +
      .../exec/persistence/HybridHashTableConf.java | 86 +
      .../persistence/HybridHashTableContainer.java | 213 ++-
      .../ql/exec/persistence/KeyValueContainer.java | 31 +-
      .../ql/exec/persistence/ObjectContainer.java | 31 +-
      .../hive/ql/exec/spark/HashTableLoader.java | 2 +-
      .../hive/ql/exec/tez/HashTableLoader.java | 60 +-
      .../ql/exec/vector/VectorMapJoinOperator.java | 4 +-
      .../mapjoin/VectorMapJoinCommonOperator.java | 4 -
      .../VectorMapJoinGenerateResultOperator.java | 25 +-
      .../fast/VectorMapJoinFastBytesHashMap.java | 4 +-
      .../VectorMapJoinFastBytesHashMultiSet.java | 4 +-
      .../fast/VectorMapJoinFastBytesHashSet.java | 4 +-
      .../fast/VectorMapJoinFastBytesHashTable.java | 4 +-
      .../mapjoin/fast/VectorMapJoinFastHashMap.java | 4 +-
      .../fast/VectorMapJoinFastHashMultiSet.java | 4 +-
      .../mapjoin/fast/VectorMapJoinFastHashSet.java | 4 +-
      .../fast/VectorMapJoinFastHashTable.java | 4 +-
      .../fast/VectorMapJoinFastHashTableLoader.java | 4 +-
      .../fast/VectorMapJoinFastLongHashMap.java | 4 +-
      .../fast/VectorMapJoinFastLongHashMultiSet.java | 4 +-
      .../fast/VectorMapJoinFastLongHashSet.java | 4 +-
      .../fast/VectorMapJoinFastLongHashTable.java | 4 +-
      .../fast/VectorMapJoinFastMultiKeyHashMap.java | 6 +-
      .../VectorMapJoinFastMultiKeyHashMultiSet.java | 4 +-
      .../fast/VectorMapJoinFastMultiKeyHashSet.java | 4 +-
      .../fast/VectorMapJoinFastStringHashMap.java | 4 +-
      .../VectorMapJoinFastStringHashMultiSet.java | 4 +-
      .../fast/VectorMapJoinFastStringHashSet.java | 4 +-
      .../fast/VectorMapJoinFastTableContainer.java | 23 +-
      .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 2 +-
      .../fast/TestVectorMapJoinFastLongHashMap.java | 14 +-
      .../TestVectorMapJoinFastMultiKeyHashMap.java | 14 +-
      .../clientpositive/auto_sortmerge_join_13.q | 2 +
      .../clientpositive/hybridgrace_hashjoin_1.q | 258 +++
      .../clientpositive/hybridgrace_hashjoin_2.q | 152 ++
      .../queries/clientpositive/hybridhashjoin.q | 250 ---
      .../test/queries/clientpositive/tez_join_hash.q | 2 +
      .../test/queries/clientpositive/tez_smb_main.q | 6 +
      .../tez/hybridgrace_hashjoin_1.q.out | 1587 ++++++++++++++++++
      .../tez/hybridgrace_hashjoin_2.q.out | 1417 ++++++++++++++++
      .../clientpositive/tez/hybridhashjoin.q.out | 1566 -----------------
      47 files changed, 3924 insertions(+), 2086 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index 72e4ff2..95e3d04 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -764,6 +764,10 @@ public class HiveConf extends Configuration {
          HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " +
              "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " +
              "This number should be power of 2."),
    + HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" +
    + " hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
    + HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" +
    + " hybrid grace hash join, the minimum number of partitions to create."),
          HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024,
              "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" +
              "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" +

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
    index b7abf0d..b2a6e58 100644
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@ -301,7 +301,8 @@ minitez.query.files=bucket_map_join_tez1.q,\
        dynamic_partition_pruning_2.q,\
        explainuser_1.q,\
        explainuser_2.q,\
    - hybridhashjoin.q,\
    + hybridgrace_hashjoin_1.q,\
    + hybridgrace_hashjoin_2.q,\
        mapjoin_decimal.q,\
        lvj_mapjoin.q, \
        mrr.q,\

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
    index c3e3078..cbf2d43 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
    @@ -32,6 +32,6 @@ public interface HashTableLoader {
        void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
            MapJoinOperator joinOp);

    - void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes,
    - long memUsage) throws HiveException;
    + void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes)
    + throws HiveException;
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    index f66ab90..f2b800a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    @@ -35,7 +35,6 @@ import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.hive.common.ObjectPair;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.ql.HashTableLoaderFactory;
    -import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler;
      import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
      import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap;
      import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
    @@ -89,9 +88,10 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
        private UnwrapRowContainer[] unwrapContainer;
        private transient Configuration hconf;
        private transient boolean hybridMapJoinLeftover; // whether there's spilled data to be processed
    - protected transient MapJoinBytesTableContainer currentSmallTable; // reloaded hashmap from disk
    - protected transient int tag; // big table alias
    - protected transient int smallTable; // small table alias
    + protected transient MapJoinBytesTableContainer[] spilledMapJoinTables; // used to hold restored
    + // spilled small tables
    + protected HybridHashTableContainer firstSmallTable; // The first small table;
    + // Only this table has spilled big table rows

        public MapJoinOperator() {
        }
    @@ -272,7 +272,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

        protected Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> loadHashTable(
            ExecMapperContext mapContext, MapredContext mrContext) throws HiveException {
    -
          loadCalled = true;

          if (this.hashTblInitedOnce
    @@ -285,9 +284,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE);
          loader.init(mapContext, mrContext, hconf, this);
    - long memUsage = (long)(MapJoinMemoryExhaustionHandler.getMaxHeapSize()
    - * conf.getHashTableMemoryUsage());
    - loader.load(mapJoinTables, mapJoinTableSerdes, memUsage);
    + loader.load(mapJoinTables, mapJoinTableSerdes);

          hashTblInitedOnce = true;

    @@ -325,18 +322,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

        @Override
        public void process(Object row, int tag) throws HiveException {
    - this.tag = tag;
    -
    - // As we're calling processOp again to process the leftover triplets, we know the "row" is
    - // coming from the on-disk matchfile. We need to recreate hashMapRowGetter against new hashtable
    - if (hybridMapJoinLeftover) {
    - assert hashMapRowGetters != null;
    - if (hashMapRowGetters[smallTable] == null) {
    - MapJoinKey refKey = getRefKey((byte) tag);
    - hashMapRowGetters[smallTable] = currentSmallTable.createGetter(refKey);
    - }
    - }
    -
          try {
            alias = (byte) tag;
            if (hashMapRowGetters == null) {
    @@ -349,13 +334,24 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
              }
            }

    + // As we're calling processOp again to process the leftover "tuples", we know the "row" is
    + // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
    + if (hybridMapJoinLeftover) {
    + MapJoinKey refKey = getRefKey(alias);
    + for (byte pos = 0; pos < order.length; pos++) {
    + if (pos != alias && spilledMapJoinTables[pos] != null) {
    + hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
    + }
    + }
    + }
    +
            // compute keys and values as StandardObjects
            ReusableGetAdaptor firstSetKey = null;
            int fieldCount = joinKeys[alias].size();
            boolean joinNeeded = false;
    + boolean bigTableRowSpilled = false;
            for (byte pos = 0; pos < order.length; pos++) {
              if (pos != alias) {
    - smallTable = pos; // record small table alias
                JoinUtil.JoinResult joinResult;
                ReusableGetAdaptor adaptor;
                if (firstSetKey == null) {
    @@ -389,9 +385,14 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
                  storage[pos] = rowContainer.copy();
                  aliasFilterTags[pos] = rowContainer.getAliasFilter();
                }
    - // Spill the big table rows into appropriate partition
    - if (joinResult == JoinUtil.JoinResult.SPILL) {
    + // Spill the big table rows into appropriate partition:
    + // When the JoinResult is SPILL, it means the corresponding small table row may have been
    + // spilled to disk (at least the partition that holds this row is on disk). So we need to
    + // postpone the join processing for this pair by also spilling this big table row.
    + if (joinResult == JoinUtil.JoinResult.SPILL &&
    + !bigTableRowSpilled) { // For n-way join, only spill big table rows once
                  spillBigTableRow(mapJoinTables[pos], row);
    + bigTableRowSpilled = true;
                }
              }
            }
    @@ -431,7 +432,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

        @Override
        public void closeOp(boolean abort) throws HiveException {
    -
          boolean spilled = false;
          for (MapJoinTableContainer container: mapJoinTables) {
            if (container != null) {
    @@ -440,10 +440,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
            }
          }

    + // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next
          if (spilled) {
    - for (MapJoinTableContainer tableContainer : mapJoinTables) {
    - if (tableContainer != null) {
    - if (tableContainer instanceof HybridHashTableContainer) {
    + if (hashMapRowGetters == null) {
    + hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
    + }
    + int numPartitions = 0;
    + // Find out number of partitions for each small table (should be same across tables)
    + for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    + if (pos != conf.getPosBigTable()) {
    + firstSmallTable = (HybridHashTableContainer)mapJoinTables[pos];
    + numPartitions = firstSmallTable.getHashPartitions().length;
    + break;
    + }
    + }
    + assert numPartitions != 0 : "Number of partitions must be greater than 0!";
    +
    + if (firstSmallTable.hasSpill()) {
    + spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length];
    + hybridMapJoinLeftover = true;
    +
    + // Clear all in-memory partitions first
    + for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    + MapJoinTableContainer tableContainer = mapJoinTables[pos];
    + if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) {
                  HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer;
                  hybridHtContainer.dumpStats();

    @@ -453,29 +473,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
                    if (!hashPartitions[i].isHashMapOnDisk()) {
                      hybridHtContainer.setTotalInMemRowCount(
                          hybridHtContainer.getTotalInMemRowCount() -
    - hashPartitions[i].getHashMapFromMemory().getNumValues());
    + hashPartitions[i].getHashMapFromMemory().getNumValues());
                      hashPartitions[i].getHashMapFromMemory().clear();
                    }
                  }
                  assert hybridHtContainer.getTotalInMemRowCount() == 0;
    + }
    + }

    - for (int i = 0; i < hashPartitions.length; i++) {
    - if (hashPartitions[i].isHashMapOnDisk()) {
    - // Recursively process on-disk triplets (hash partition, sidefile, matchfile)
    - try {
    - hybridMapJoinLeftover = true;
    - hashMapRowGetters[smallTable] = null;
    - continueProcess(hashPartitions[i], hybridHtContainer);
    - } catch (IOException e) {
    - e.printStackTrace();
    - } catch (ClassNotFoundException e) {
    - e.printStackTrace();
    - } catch (SerDeException e) {
    - e.printStackTrace();
    - }
    - }
    - hybridMapJoinLeftover = false;
    - currentSmallTable = null;
    + // Reprocess the spilled data
    + for (int i = 0; i < numPartitions; i++) {
    + HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
    + if (hashPartitions[i].isHashMapOnDisk()) {
    + try {
    + continueProcess(i); // Re-process spilled data
    + } catch (IOException e) {
    + e.printStackTrace();
    + } catch (SerDeException e) {
    + e.printStackTrace();
    + } catch (ClassNotFoundException e) {
    + e.printStackTrace();
    + }
    + for (byte pos = 0; pos < order.length; pos++) {
    + if (pos != conf.getPosBigTable())
    + spilledMapJoinTables[pos] = null;
                  }
                }
              }
    @@ -497,18 +518,20 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
        }

        /**
    - * Continue processing each pair of spilled hashtable and big table row container
    - * @param partition hash partition to process
    - * @param hybridHtContainer Hybrid hashtable container
    + * Continue processing join between spilled hashtable(s) and spilled big table
    + * @param partitionId the partition number across all small tables to process
         * @throws HiveException
         * @throws IOException
    - * @throws ClassNotFoundException
         * @throws SerDeException
         */
    - private void continueProcess(HashPartition partition, HybridHashTableContainer hybridHtContainer)
    - throws HiveException, IOException, ClassNotFoundException, SerDeException {
    - reloadHashTable(partition, hybridHtContainer);
    - reProcessBigTable(partition);
    + private void continueProcess(int partitionId)
    + throws HiveException, IOException, SerDeException, ClassNotFoundException {
    + for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    + if (pos != conf.getPosBigTable()) {
    + reloadHashTable(pos, partitionId);
    + }
    + }
    + reProcessBigTable(partitionId);
        }

        /**
    @@ -516,16 +539,16 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
         * It can have two steps:
         * 1) Deserialize a serialized hash table, and
         * 2) Merge every key/value pair from small table container into the hash table
    - * @param partition hash partition to process
    - * @param hybridHtContainer Hybrid hashtable container
    + * @param pos position of small table
    + * @param partitionId the partition of the small table to be reloaded from
         * @throws IOException
    - * @throws ClassNotFoundException
         * @throws HiveException
         * @throws SerDeException
         */
    - protected void reloadHashTable(HashPartition partition,
    - HybridHashTableContainer hybridHtContainer)
    - throws IOException, ClassNotFoundException, HiveException, SerDeException {
    + protected void reloadHashTable(byte pos, int partitionId)
    + throws IOException, HiveException, SerDeException, ClassNotFoundException {
    + HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos];
    + HashPartition partition = container.getHashPartitions()[partitionId];

          // Merge the sidefile into the newly created hash table
          // This is where the spilling may happen again
    @@ -544,11 +567,12 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
          // If based on the new key count, keyCount is smaller than a threshold,
          // then just load the entire restored hashmap into memory.
          // The size of deserialized partition shouldn't exceed half of memory limit
    - if (rowCount * hybridHtContainer.getTableRowSize() >= hybridHtContainer.getMemoryThreshold() / 2) {
    - LOG.info("Hybrid Grace Hash Join: Hash table reload can fail since it will be greater than memory limit. Recursive spilling is currently not supported");
    + if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
    + LOG.warn("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" +
    + " will be greater than memory limit. Recursive spilling is currently not supported");
          }

    - KeyValueHelper writeHelper = hybridHtContainer.getWriteHelper();
    + KeyValueHelper writeHelper = container.getWriteHelper();
          while (kvContainer.hasNext()) {
            ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
            Writable key = pair.getFirst();
    @@ -557,27 +581,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
            restoredHashMap.put(writeHelper, -1);
          }

    - hybridHtContainer.setTotalInMemRowCount(hybridHtContainer.getTotalInMemRowCount()
    + container.setTotalInMemRowCount(container.getTotalInMemRowCount()
              + restoredHashMap.getNumValues() + kvContainer.size());
          kvContainer.clear();

    - // Since there's only one hashmap to deal with, it's OK to create a MapJoinBytesTableContainer
    - currentSmallTable = new MapJoinBytesTableContainer(restoredHashMap);
    - currentSmallTable.setInternalValueOi(hybridHtContainer.getInternalValueOi());
    - currentSmallTable.setSortableSortOrders(hybridHtContainer.getSortableSortOrders());
    + spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
    + spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
    + spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
        }

        /**
         * Iterate over the big table row container and feed process() with leftover rows
    - * @param partition the hash partition being brought back to memory at the moment
    + * @param partitionId the partition from which to take out spilled big table rows
         * @throws HiveException
    - * @throws IOException
         */
    - protected void reProcessBigTable(HashPartition partition) throws HiveException, IOException {
    + protected void reProcessBigTable(int partitionId) throws HiveException {
    + // For binary join, firstSmallTable is the only small table; it has reference to spilled big
    + // table rows;
    + // For n-way join, since we only spill once, when processing the first small table, so only the
    + // firstSmallTable has reference to the spilled big table rows.
    + HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
          ObjectContainer bigTable = partition.getMatchfileObjContainer();
          while (bigTable.hasNext()) {
            Object row = bigTable.next();
    - process(row, tag);
    + process(row, conf.getPosBigTable());
          }
          bigTable.clear();
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
    index 96a6728..abf38e4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
    @@ -72,7 +72,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
        @Override
        public void load(
            MapJoinTableContainer[] mapJoinTables,
    - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException {
    + MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {

          String currentInputPath = context.getCurrentInputPath().toString();
          LOG.info("******* Load from HashTable for input file: " + currentInputPath);

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
    index dd5c621..2ba622e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
    @@ -566,6 +566,7 @@ public final class BytesBytesMultiHashMap {
          this.writeBuffers.clear();
          this.refs = new long[1];
          this.keysAssigned = 0;
    + this.numValues = 0;
        }

        public void expandAndRehashToTarget(int estimateNewRowCount) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
    new file mode 100644
    index 0000000..625038f
    --- /dev/null
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
    @@ -0,0 +1,86 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.exec.persistence;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +/**
    + * This conf class is a wrapper of a list of HybridHashTableContainers and some common info shared
    + * among them, which is used in n-way join (multiple small tables are involved).
    + */
    +public class HybridHashTableConf {
    + private List<HybridHashTableContainer> loadedContainerList; // A list of alrady loaded containers
    + private int numberOfPartitions = 0; // Number of partitions each table should have
    + private int nextSpillPartition = -1; // The partition to be spilled next
    +
    + public HybridHashTableConf() {
    + loadedContainerList = new ArrayList<HybridHashTableContainer>();
    + }
    +
    + public int getNumberOfPartitions() {
    + return numberOfPartitions;
    + }
    +
    + public void setNumberOfPartitions(int numberOfPartitions) {
    + this.numberOfPartitions = numberOfPartitions;
    + this.nextSpillPartition = numberOfPartitions - 1;
    + }
    +
    + public int getNextSpillPartition() {
    + return this.nextSpillPartition;
    + }
    +
    + public void setNextSpillPartition(int nextSpillPartition) {
    + this.nextSpillPartition = nextSpillPartition;
    + }
    +
    +
    + public List<HybridHashTableContainer> getLoadedContainerList() {
    + return loadedContainerList;
    + }
    +
    + /**
    + * Spill one in-memory partition from tail for all previously loaded HybridHashTableContainers.
    + * Also mark that partition number as spill-on-creation for future created containers.
    + * @return amount of memory freed; 0 if only one last partition is in memory for each container
    + */
    + public long spill() throws IOException {
    + if (nextSpillPartition == 0) {
    + return 0;
    + }
    + long memFreed = 0;
    + for (HybridHashTableContainer container : loadedContainerList) {
    + memFreed += container.spillPartition(nextSpillPartition);
    + container.setSpill(true);
    + }
    + nextSpillPartition--;
    + return memFreed;
    + }
    +
    + /**
    + * Check if a partition should be spilled directly on creation
    + * @param partitionId the partition to create
    + * @return true if it should be spilled directly, false otherwise
    + */
    + public boolean doSpillOnCreation(int partitionId) {
    + return nextSpillPartition != -1 && partitionId > nextSpillPartition;
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    index cb9083d..3f6d61e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    @@ -72,15 +72,18 @@ public class HybridHashTableContainer
        private static final Log LOG = LogFactory.getLog(HybridHashTableContainer.class);

        private final HashPartition[] hashPartitions; // an array of partitions holding the triplets
    - private int totalInMemRowCount = 0; // total number of small table rows in memory
    - private final long memoryThreshold; // the max memory limit allocated
    + private int totalInMemRowCount = 0; // total number of small table rows in memory
    + private long memoryThreshold; // the max memory limit that can be allocated
    + private long memoryUsed; // the actual memory used
    + private int writeBufferSize; // write buffer size for this HybridHashTableContainer
        private final long tableRowSize; // row size of the small table
    - private boolean isSpilled; // whether there's any spilled partition
    - private int toSpillPartitionId; // the partition into which to spill the big table row;
    - // This may change after every setMapJoinKey call
    - private int numPartitionsSpilled; // number of spilled partitions
    - private boolean lastPartitionInMem; // only one (last one) partition is left in memory
    + private boolean isSpilled; // whether there's any spilled partition
    + private int toSpillPartitionId; // the partition into which to spill the big table row;
    + // This may change after every setMapJoinKey call
    + private int numPartitionsSpilled; // number of spilled partitions
    + private boolean lastPartitionInMem; // only one (last one) partition is left in memory
        private final int memoryCheckFrequency; // how often (# of rows apart) to check if memory is full
    + private HybridHashTableConf nwayConf; // configuration for n-way join

        /** The OI used to deserialize values. We never deserialize keys. */
        private LazyBinaryStructObjectInspector internalValueOi;
    @@ -182,53 +185,93 @@ public class HybridHashTableContainer
          }
        }

    - public HybridHashTableContainer(Configuration hconf, long keyCount, long memUsage, long tableSize)
    - throws SerDeException {
    + public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable,
    + long estimatedTableSize, HybridHashTableConf nwayConf)
    + throws SerDeException, IOException {
          this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
               HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
               HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
    - HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
    - HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD),
               HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
    - tableSize, keyCount, memUsage);
    + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
    + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
    + estimatedTableSize, keyCount, memoryAvailable, nwayConf);
        }

    - private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, int wbSize,
    - long noConditionalTaskThreshold, int memCheckFreq, long tableSize,
    - long keyCount, long memUsage) throws SerDeException {
    -
    - if (wbSize > noConditionalTaskThreshold) {
    - LOG.warn("adjusting hash table write buffer size to be smaller than noconditionaltasksize");
    - wbSize = (int) noConditionalTaskThreshold;
    - }
    -
    + private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor,
    + int memCheckFreq, int minWbSize, int minNumParts,
    + long estimatedTableSize, long keyCount,
    + long memoryAvailable, HybridHashTableConf nwayConf)
    + throws SerDeException, IOException {
          directWriteHelper = new MapJoinBytesTableContainer.DirectKeyValueWriter();

          int newKeyCount = HashMapWrapper.calculateTableSize(
              keyCountAdj, threshold, loadFactor, keyCount);

    - memoryThreshold = noConditionalTaskThreshold;
    - tableRowSize = tableSize / newKeyCount;
    + memoryThreshold = memoryAvailable;
    + tableRowSize = estimatedTableSize / keyCount;
          memoryCheckFrequency = memCheckFreq;

    - int numPartitions = calcNumPartitions(tableSize, wbSize); // estimate # of partitions to create
    + this.nwayConf = nwayConf;
    + int numPartitions;
    + if (nwayConf == null) { // binary join
    + numPartitions = calcNumPartitions(memoryThreshold, estimatedTableSize, minNumParts, minWbSize,
    + nwayConf);
    + writeBufferSize = (int)(estimatedTableSize / numPartitions);
    + } else { // n-way join
    + // It has been calculated in HashTableLoader earlier, so just need to retrieve that number
    + numPartitions = nwayConf.getNumberOfPartitions();
    + if (nwayConf.getLoadedContainerList().size() == 0) { // n-way: first small table
    + writeBufferSize = (int)(estimatedTableSize / numPartitions);
    + } else { // n-way: all later small tables
    + while (memoryThreshold < numPartitions * minWbSize) {
    + // Spill previously loaded tables to make more room
    + long memFreed = nwayConf.spill();
    + if (memFreed == 0) {
    + LOG.warn("Available memory is not enough to create HybridHashTableContainers" +
    + " consistently!");
    + break;
    + } else {
    + LOG.info("Total available memory was: " + memoryThreshold);
    + memoryThreshold += memFreed;
    + LOG.info("Total available memory is: " + memoryThreshold);
    + }
    + }
    + writeBufferSize = (int)(memoryThreshold / numPartitions);
    + }
    + }
    + writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize;
    + LOG.info("Write buffer size: " + writeBufferSize);
          hashPartitions = new HashPartition[numPartitions];
          int numPartitionsSpilledOnCreation = 0;
    - long memoryAllocated = 0;
    + memoryUsed = 0;
          int initialCapacity = Math.max(newKeyCount / numPartitions, threshold / numPartitions);
          for (int i = 0; i < numPartitions; i++) {
    - if (i == 0) { // We unconditionally create a hashmap for the first hash partition
    - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, true);
    - } else {
    - hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage,
    - memoryAllocated + wbSize < memoryThreshold);
    + if (this.nwayConf == null || // binary join
    + nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table
    + if (i == 0) { // We unconditionally create a hashmap for the first hash partition
    + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, true);
    + } else {
    + hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold,
    + memoryUsed + writeBufferSize < memoryThreshold);
    + }
    + } else { // n-way join
    + // For all later small tables, follow the same pattern of the previously loaded tables.
    + if (this.nwayConf.doSpillOnCreation(i)) {
    + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, false);
    + } else {
    + hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, true);
    + }
            }
    +
            if (isHashMapSpilledOnCreation(i)) {
              numPartitionsSpilledOnCreation++;
              numPartitionsSpilled++;
              this.setSpill(true);
    + if (this.nwayConf != null && this.nwayConf.getNextSpillPartition() == numPartitions - 1) {
    + this.nwayConf.setNextSpillPartition(i - 1);
    + }
            } else {
    - memoryAllocated += hashPartitions[i].hashMap.memorySize();
    + memoryUsed += hashPartitions[i].hashMap.memorySize();
            }
          }
          assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" +
    @@ -236,6 +279,11 @@ public class HybridHashTableContainer
          LOG.info("Number of partitions created: " + numPartitions);
          LOG.info("Number of partitions spilled directly to disk on creation: "
              + numPartitionsSpilledOnCreation);
    +
    + // Append this container to the loaded list
    + if (this.nwayConf != null) {
    + this.nwayConf.getLoadedContainerList().add(this);
    + }
        }


    @@ -251,6 +299,20 @@ public class HybridHashTableContainer
          return memoryThreshold;
        }

    + /**
    + * Get the current memory usage by recalculating it.
    + * @return current memory usage
    + */
    + public long refreshMemoryUsed() {
    + long memUsed = 0;
    + for (HashPartition hp : hashPartitions) {
    + if (hp.hashMap != null) {
    + memUsed += hp.hashMap.memorySize();
    + }
    + }
    + return memoryUsed = memUsed;
    + }
    +
        public LazyBinaryStructObjectInspector getInternalValueOi() {
          return internalValueOi;
        }
    @@ -313,10 +375,16 @@ public class HybridHashTableContainer
                  LOG.warn("This LAST partition in memory won't be spilled!");
                  lastPartitionInMem = true;
                } else {
    - int biggest = biggestPartition();
    + if (nwayConf == null) { // binary join
    + int biggest = biggestPartition();
    + spillPartition(biggest);
    + this.setSpill(true);
    + } else { // n-way join
    + LOG.info("N-way spilling: spill tail partition from previously loaded small tables");
    + memoryThreshold += nwayConf.spill();
    + LOG.info("Memory threshold has been increased to: " + memoryThreshold);
    + }
                  numPartitionsSpilled++;
    - spillPartition(biggest);
    - this.setSpill(true);
                }
              }
            }
    @@ -349,13 +417,7 @@ public class HybridHashTableContainer
         * @return true if memory is full, false if not
         */
        private boolean isMemoryFull() {
    - long size = 0;
    - for (int i = 0; i < hashPartitions.length; i++) {
    - if (!isOnDisk(i)) {
    - size += hashPartitions[i].hashMap.memorySize();
    - }
    - }
    - return size >= memoryThreshold;
    + return refreshMemoryUsed() >= memoryThreshold;
        }

        /**
    @@ -385,11 +447,11 @@ public class HybridHashTableContainer
        /**
         * Move the hashtable of a specified partition from memory into local file system
         * @param partitionId the hashtable to be moved
    + * @return amount of memory freed
         */
    - private void spillPartition(int partitionId) throws IOException {
    + public long spillPartition(int partitionId) throws IOException {
          HashPartition partition = hashPartitions[partitionId];
          int inMemRowCount = partition.hashMap.getNumValues();
    - long inMemSize = partition.hashMap.memorySize();

          Path path = Files.createTempFile("partition-" + partitionId + "-", null);
          OutputStream outputStream = Files.newOutputStream(path);
    @@ -403,57 +465,55 @@ public class HybridHashTableContainer
          partition.hashMapLocalPath = path;
          partition.hashMapOnDisk = true;

    - long size = 0;
    - for (int i = 0; i < hashPartitions.length; i++) {
    - if (!isOnDisk(i)) {
    - size += hashPartitions[i].hashMap.memorySize();
    - }
    - }
          LOG.info("Spilling hash partition " + partitionId + " (Rows: " + inMemRowCount +
    - ", Mem size: " + inMemSize + "): " + path);
    - LOG.info("Memory usage before spilling: " + size);
    - LOG.info("Memory usage after spilling: " + (size - inMemSize));
    + ", Mem size: " + partition.hashMap.memorySize() + "): " + path);
    + LOG.info("Memory usage before spilling: " + memoryUsed);
    +
    + long memFreed = partition.hashMap.memorySize();
    + memoryUsed -= memFreed;
    + LOG.info("Memory usage after spilling: " + memoryUsed);

          totalInMemRowCount -= inMemRowCount;
          partition.hashMap.clear();
    + return memFreed;
        }

        /**
    - * Calculate how many partitions are needed. This is an estimation.
    + * Calculate how many partitions are needed.
    + * For n-way join, we only do this calculation once in the HashTableLoader, for the biggest small
    + * table. Other small tables will use the same number. They may need to adjust (usually reduce)
    + * their individual write buffer size in order not to exceed memory threshold.
    + * @param memoryThreshold memory threshold for the given table
         * @param dataSize total data size for the table
    - * @param wbSize write buffer size
    + * @param minNumParts minimum required number of partitions
    + * @param minWbSize minimum required write buffer size
    + * @param nwayConf the n-way join configuration
         * @return number of partitions needed
         */
    - private int calcNumPartitions(long dataSize, int wbSize) {
    - if (memoryThreshold < wbSize) {
    - throw new IllegalStateException("Available memory is less than hashtable writebuffer size!"
    - + " Try increasing hive.auto.convert.join.noconditionaltask.size.");
    - }
    -
    - int lowerLimit = 2;
    - int numPartitions = (int) Math.ceil(dataSize / wbSize);
    -
    - LOG.info("Total available memory: " + memoryThreshold);
    - LOG.info("Estimated small table size: " + dataSize);
    - LOG.info("Write buffer size: " + wbSize);
    - LOG.info("Initial number of partitions: " + numPartitions);
    + public static int calcNumPartitions(long memoryThreshold, long dataSize, int minNumParts,
    + int minWbSize, HybridHashTableConf nwayConf) throws IOException {
    + int numPartitions = minNumParts;

    - if (numPartitions < lowerLimit) {
    - return lowerLimit;
    - } else if (dataSize > memoryThreshold) {
    - numPartitions = (int) (memoryThreshold / wbSize);
    + if (memoryThreshold < minNumParts * minWbSize) {
    + LOG.warn("Available memory is not enough to create a HybridHashTableContainer!");
          }
    - // Make sure numPartitions is power of 2, to make N & (M - 1) easy when calculating partition No.
    - numPartitions = (Long.bitCount(numPartitions) == 1) ? numPartitions
    - : Integer.highestOneBit(numPartitions) << 1;
    - while (dataSize / numPartitions > memoryThreshold) {
    - numPartitions *= 2;
    + if (memoryThreshold < dataSize) {
    + while (dataSize / numPartitions > memoryThreshold) {
    + numPartitions *= 2;
    + }
          }

    + LOG.info("Total available memory: " + memoryThreshold);
    + LOG.info("Estimated small table size: " + dataSize);
          LOG.info("Number of hash partitions to be created: " + numPartitions);
          return numPartitions;
        }

    + /* Get number of partitions */
    + public int getNumPartitions() {
    + return hashPartitions.length;
    + }
    +
        /* Get total number of rows from all in memory partitions */
        public int getTotalInMemRowCount() {
          return totalInMemRowCount;
    @@ -494,6 +554,7 @@ public class HybridHashTableContainer
              hp.hashMap.clear();
            }
          }
    + memoryUsed = 0;
        }

        @Override

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
    index d3ec29a..d1bea48 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
    @@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.fs.FileUtil;
      import org.apache.hadoop.hive.common.ObjectPair;
      import org.apache.hadoop.hive.ql.io.HiveKey;
    +import org.apache.hadoop.hive.ql.metadata.HiveException;
      import org.apache.hadoop.io.BytesWritable;

      import java.io.File;
    @@ -64,29 +65,31 @@ public class KeyValueContainer {
          }
          try {
            setupOutput();
    - } catch (IOException e) {
    + } catch (IOException | HiveException e) {
            throw new RuntimeException("Failed to create temporary output file on disk", e);
          }
        }

    - private void setupOutput() throws IOException {
    - if (parentFile == null) {
    - parentFile = File.createTempFile("key-value-container", "");
    - if (parentFile.delete() && parentFile.mkdir()) {
    - parentFile.deleteOnExit();
    + private void setupOutput() throws IOException, HiveException {
    + FileOutputStream fos = null;
    + try {
    + if (parentFile == null) {
    + parentFile = File.createTempFile("key-value-container", "");
    + if (parentFile.delete() && parentFile.mkdir()) {
    + parentFile.deleteOnExit();
    + }
            }
    - }

    - if (tmpFile == null || input != null) {
    - tmpFile = File.createTempFile("KeyValueContainer", ".tmp", parentFile);
    - LOG.info("KeyValueContainer created temp file " + tmpFile.getAbsolutePath());
    - tmpFile.deleteOnExit();
    - }
    + if (tmpFile == null || input != null) {
    + tmpFile = File.createTempFile("KeyValueContainer", ".tmp", parentFile);
    + LOG.info("KeyValueContainer created temp file " + tmpFile.getAbsolutePath());
    + tmpFile.deleteOnExit();
    + }

    - FileOutputStream fos = null;
    - try {
            fos = new FileOutputStream(tmpFile);
            output = new Output(fos);
    + } catch (IOException e) {
    + throw new HiveException(e);
          } finally {
            if (output == null && fos != null) {
              fos.close();

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
    index 18943dd..7d7ce1d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
    @@ -26,6 +26,7 @@ import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.fs.FileUtil;
      import org.apache.hadoop.hive.ql.exec.Utilities;
    +import org.apache.hadoop.hive.ql.metadata.HiveException;

      import java.io.File;
      import java.io.FileInputStream;
    @@ -67,29 +68,31 @@ public class ObjectContainer<ROW> {
          kryo = Utilities.runtimeSerializationKryo.get();
          try {
            setupOutput();
    - } catch (IOException e) {
    + } catch (IOException | HiveException e) {
            throw new RuntimeException("Failed to create temporary output file on disk", e);
          }
        }

    - private void setupOutput() throws IOException {
    - if (parentFile == null) {
    - parentFile = File.createTempFile("object-container", "");
    - if (parentFile.delete() && parentFile.mkdir()) {
    - parentFile.deleteOnExit();
    + private void setupOutput() throws IOException, HiveException {
    + FileOutputStream fos = null;
    + try {
    + if (parentFile == null) {
    + parentFile = File.createTempFile("object-container", "");
    + if (parentFile.delete() && parentFile.mkdir()) {
    + parentFile.deleteOnExit();
    + }
            }
    - }

    - if (tmpFile == null || input != null) {
    - tmpFile = File.createTempFile("ObjectContainer", ".tmp", parentFile);
    - LOG.info("ObjectContainer created temp file " + tmpFile.getAbsolutePath());
    - tmpFile.deleteOnExit();
    - }
    + if (tmpFile == null || input != null) {
    + tmpFile = File.createTempFile("ObjectContainer", ".tmp", parentFile);
    + LOG.info("ObjectContainer created temp file " + tmpFile.getAbsolutePath());
    + tmpFile.deleteOnExit();
    + }

    - FileOutputStream fos = null;
    - try {
            fos = new FileOutputStream(tmpFile);
            output = new Output(fos);
    + } catch (IOException e) {
    + throw new HiveException(e);
          } finally {
            if (output == null && fos != null) {
              fos.close();

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
    index fe108c4..043f1f7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
    @@ -69,7 +69,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable

        @Override
        public void load(MapJoinTableContainer[] mapJoinTables,
    - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
    + MapJoinTableContainerSerDe[] mapJoinTableSerdes)
            throws HiveException {

          // Note: it's possible that a MJ operator is in a ReduceWork, in which case the

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    index ba5a797..6a81f11 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
      import org.apache.hadoop.hive.ql.exec.MapredContext;
      import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
      import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper;
    +import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf;
      import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
      import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
      import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
    @@ -69,7 +70,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable

        @Override
        public void load(MapJoinTableContainer[] mapJoinTables,
    - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
    + MapJoinTableContainerSerDe[] mapJoinTableSerdes)
            throws HiveException {

          Map<Integer, String> parentToInput = desc.getParentToInput();
    @@ -79,10 +80,44 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
              hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
          boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
          boolean isFirstKey = true;
    + // TODO remove this after memory manager is in
    + long noConditionalTaskThreshold = HiveConf.getLongVar(
    + hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    +
    + // Only applicable to n-way Hybrid Grace Hash Join
    + HybridHashTableConf nwayConf = null;
    + long totalSize = 0;
    + int biggest = 0; // position of the biggest small table
    + if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
    + // Create a Conf for n-way HybridHashTableContainers
    + nwayConf = new HybridHashTableConf();
    +
    + // Find the biggest small table; also calculate total data size of all small tables
    + long maxSize = 0; // the size of the biggest small table
    + for (int pos = 0; pos < mapJoinTables.length; pos++) {
    + if (pos == desc.getPosBigTable()) {
    + continue;
    + }
    + totalSize += desc.getParentDataSizes().get(pos);
    + biggest = desc.getParentDataSizes().get(pos) > maxSize ? pos : biggest;
    + maxSize = desc.getParentDataSizes().get(pos) > maxSize ? desc.getParentDataSizes().get(pos)
    + : maxSize;
    + }

    - // Disable hybrid grace hash join for n-way join
    - if (mapJoinTables.length > 2) {
    - useHybridGraceHashJoin = false;
    + // Using biggest small table, calculate number of partitions to create for each small table
    + float percentage = (float) maxSize / totalSize;
    + long memory = (long) (noConditionalTaskThreshold * percentage);
    + int numPartitions = 0;
    + try {
    + numPartitions = HybridHashTableContainer.calcNumPartitions(memory,
    + desc.getParentDataSizes().get(biggest),
    + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
    + HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
    + nwayConf);
    + } catch (IOException e) {
    + throw new HiveException(e);
    + }
    + nwayConf.setNumberOfPartitions(numPartitions);
          }

          for (int pos = 0; pos < mapJoinTables.length; pos++) {
    @@ -122,10 +157,21 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
              Long keyCountObj = parentKeyCounts.get(pos);
              long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();

    + long memory = 0;
    + if (useHybridGraceHashJoin) {
    + if (mapJoinTables.length > 2) {
    + // Allocate n-way join memory proportionally
    + float percentage = (float) desc.getParentDataSizes().get(pos) / totalSize;
    + memory = (long) (noConditionalTaskThreshold * percentage);
    + } else { // binary join
    + memory = noConditionalTaskThreshold;
    + }
    + }
    +
              MapJoinTableContainer tableContainer = useOptimizedTables
    - ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, memUsage,
    - desc.getParentDataSizes().get(pos))
    - : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, memUsage))
    + ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount,
    + memory, desc.getParentDataSizes().get(pos), nwayConf)
    + : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0))
                  : new HashMapWrapper(hconf, keyCount);
              LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    index 534a906..0547346 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    @@ -311,8 +311,10 @@ public class VectorMapJoinOperator extends MapJoinOperator implements Vectorizat
        }

        @Override
    - protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition)
    + protected void reProcessBigTable(int partitionId)
            throws HiveException {
    +
    + HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
          ObjectContainer bigTable = partition.getMatchfileObjContainer();

          DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    index f272b6d..f9d5736 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    @@ -744,10 +744,6 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
          // Setup a scratch batch that will be used to play back big table rows that were spilled
          // to disk for the Hybrid Grace hash partitioning.
          spillReplayBatch = VectorizedBatchUtil.makeLike(batch);
    -
    - // TEMPORARY -- Set this up for Hybrid Grace logic in MapJoinOperator.closeOp
    - hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
    - smallTable = posSingleVectorMapJoinSmallTable;
        }

        protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    index 743a975..70c8cb1 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    @@ -27,6 +27,8 @@ import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
      import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition;
    +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
    +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
      import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
      import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
      import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow;
    @@ -449,7 +451,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC

          int partitionId = hashTableResult.spillPartitionId();

    - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[smallTable];
    + HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
          HashPartition hp = ht.getHashPartitions()[partitionId];

          VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
    @@ -499,27 +501,30 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
        }

        @Override
    - protected void reloadHashTable(HashPartition partition,
    - HybridHashTableContainer hybridHtContainer)
    - throws IOException, ClassNotFoundException, HiveException, SerDeException {
    + protected void reloadHashTable(byte pos, int partitionId)
    + throws IOException, HiveException, SerDeException, ClassNotFoundException {

    - // The super method will reload a hash table partition and
    - // put a single MapJoinBytesTableContainer into the currentSmallTable member.
    - super.reloadHashTable(partition, hybridHtContainer);
    + // The super method will reload a hash table partition of one of the small tables.
    + // Currently, for native vector map join it will only be one small table.
    + super.reloadHashTable(pos, partitionId);
    +
    + MapJoinTableContainer smallTable = spilledMapJoinTables[pos];

          vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf,
    - currentSmallTable);
    + smallTable);
          needHashTableSetup = true;

          LOG.info(CLASS_NAME + " reloadHashTable!");
        }

        @Override
    - protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition)
    - throws HiveException, IOException {
    + protected void reProcessBigTable(int partitionId)
    + throws HiveException {

          LOG.info(CLASS_NAME + " reProcessBigTable enter...");

    + HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
    +
          int rowCount = 0;
          int batchCount = 0;


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
    index 0796406..f9550c9 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
    @@ -90,8 +90,8 @@ public abstract class VectorMapJoinFastBytesHashMap
        }

        public VectorMapJoinFastBytesHashMap(
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);

          valueStore = new VectorMapJoinFastValueStore(writeBuffersSize);


    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
    index d685c22..9dcaf8f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
    @@ -85,8 +85,8 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
        }

        public VectorMapJoinFastBytesHashMultiSet(
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);

          keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize);
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
    index 9f20fdc..9f122c4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
    @@ -77,8 +77,8 @@ public abstract class VectorMapJoinFastBytesHashSet
        }

        public VectorMapJoinFastBytesHashSet(
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);

          keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize);
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
    index 594a77f..b6e6321 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
    @@ -214,8 +214,8 @@ public abstract class VectorMapJoinFastBytesHashTable
        }

        public VectorMapJoinFastBytesHashTable(
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
          allocateBucketArray();
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
    index b37247c..262b619 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
    @@ -32,7 +32,7 @@ public abstract class VectorMapJoinFastHashMap

        public VectorMapJoinFastHashMap(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
    index 5569f6e..5f7c6a7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
    @@ -42,7 +42,7 @@ public abstract class VectorMapJoinFastHashMultiSet

        public VectorMapJoinFastHashMultiSet(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
    index 0738df3..8509971 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
    @@ -38,7 +38,7 @@ public abstract class VectorMapJoinFastHashSet

        public VectorMapJoinFastHashSet(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
    index 33e34fa..fbe6b4c 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
    @@ -30,7 +30,6 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab

        protected float loadFactor;
        protected int writeBuffersSize;
    - protected long memUsage;

        protected int metricPutConflict;
        protected int largestNumberOfSteps;
    @@ -52,7 +51,7 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
        }

        public VectorMapJoinFastHashTable(
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    + int initialCapacity, float loadFactor, int writeBuffersSize) {

          initialCapacity = (Long.bitCount(initialCapacity) == 1)
              ? initialCapacity : nextHighestPowerOfTwo(initialCapacity);
    @@ -65,6 +64,5 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab

          this.loadFactor = loadFactor;
          this.writeBuffersSize = writeBuffersSize;
    - this.memUsage = memUsage;
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
    index 92b5d40..4edf604 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
    @@ -62,7 +62,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.

        @Override
        public void load(MapJoinTableContainer[] mapJoinTables,
    - MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
    + MapJoinTableContainerSerDe[] mapJoinTableSerdes)
            throws HiveException {

          Map<Integer, String> parentToInput = desc.getParentToInput();
    @@ -91,7 +91,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
              long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();

              VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer =
    - new VectorMapJoinFastTableContainer(desc, hconf, keyCount, memUsage);
    + new VectorMapJoinFastTableContainer(desc, hconf, keyCount);

              while (kvReader.next()) {
                vectorMapJoinFastTableContainer.putRow(

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
    index 3a0b380..d6ad028 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
    @@ -86,9 +86,9 @@ public class VectorMapJoinFastLongHashMap

        public VectorMapJoinFastLongHashMap(
            boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
          super(minMaxEnabled, isOuterJoin, hashTableKeyType,
    - initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + initialCapacity, loadFactor, writeBuffersSize);
          valueStore = new VectorMapJoinFastValueStore(writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
    index f9763e3..e447551 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
    @@ -84,8 +84,8 @@ public class VectorMapJoinFastLongHashMultiSet

        public VectorMapJoinFastLongHashMultiSet(
            boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
          super(minMaxEnabled, isOuterJoin, hashTableKeyType,
    - initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
    index cd23949..aa44e60 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
    @@ -77,8 +77,8 @@ public class VectorMapJoinFastLongHashSet

        public VectorMapJoinFastLongHashSet(
            boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
          super(minMaxEnabled, isOuterJoin, hashTableKeyType,
    - initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
    index b448e1f..2137fb7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
    @@ -270,8 +270,8 @@ public abstract class VectorMapJoinFastLongHashTable

        public VectorMapJoinFastLongHashTable(
              boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
          this.isOuterJoin = isOuterJoin;
          this.hashTableKeyType = hashTableKeyType;
          PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.longTypeInfo };

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
    index b962475..9a9fb8d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
    @@ -28,12 +28,12 @@ public class VectorMapJoinFastMultiKeyHashMap

        @VisibleForTesting
        public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) {
    - this(false, initialCapacity, loadFactor, wbSize, -1);
    + this(false, initialCapacity, loadFactor, wbSize);
        }

        public VectorMapJoinFastMultiKeyHashMap(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
    index 71a62fe..a8744a5 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
    @@ -26,7 +26,7 @@ public class VectorMapJoinFastMultiKeyHashMultiSet

        public VectorMapJoinFastMultiKeyHashMultiSet(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
    index dad3b32..a8048e5 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
    @@ -26,7 +26,7 @@ public class VectorMapJoinFastMultiKeyHashSet

        public VectorMapJoinFastMultiKeyHashSet(
              boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
    index c80ea89..6f181b2 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
    @@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMa

        public VectorMapJoinFastStringHashMap(
            boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
          stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
    index 4933b16..9653b71 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
    @@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesH

        public VectorMapJoinFastStringHashMultiSet(
            boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
          stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
    index ae8d943..6419a0b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
    @@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSe

        public VectorMapJoinFastStringHashSet(
            boolean isOuterJoin,
    - int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
    - super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
    + int initialCapacity, float loadFactor, int writeBuffersSize) {
    + super(initialCapacity, loadFactor, writeBuffersSize);
          stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
        }
      }
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
    index 3789275..373b5f4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
    @@ -56,13 +56,12 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
        private float loadFactor;
        private int wbSize;
        private long keyCount;
    - private long memUsage;


        private VectorMapJoinFastHashTable VectorMapJoinFastHashTable;

        public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf,
    - long keyCount, long memUsage) throws SerDeException {
    + long keyCount) throws SerDeException {

          this.desc = desc;
          this.hconf = hconf;
    @@ -73,13 +72,11 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
          wbSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE);

          this.keyCount = keyCount;
    - this.memUsage = memUsage;

          // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj);
          // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold);
          // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor);
          // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize);
    - // LOG.info("VectorMapJoinFastTableContainer load memUsage " + memUsage);

          int newThreshold = HashMapWrapper.calculateTableSize(
              keyCountAdj, threshold, loadFactor, keyCount);
    @@ -117,17 +114,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
            case HASH_MAP:
              hashTable = new VectorMapJoinFastLongHashMap(
                      minMaxEnabled, isOuterJoin, hashTableKeyType,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_MULTISET:
              hashTable = new VectorMapJoinFastLongHashMultiSet(
                      minMaxEnabled, isOuterJoin, hashTableKeyType,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_SET:
              hashTable = new VectorMapJoinFastLongHashSet(
                      minMaxEnabled, isOuterJoin, hashTableKeyType,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            }
            break;
    @@ -137,17 +134,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
            case HASH_MAP:
              hashTable = new VectorMapJoinFastStringHashMap(
                      isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_MULTISET:
              hashTable = new VectorMapJoinFastStringHashMultiSet(
                      isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_SET:
              hashTable = new VectorMapJoinFastStringHashSet(
                      isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            }
            break;
    @@ -157,17 +154,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
            case HASH_MAP:
              hashTable = new VectorMapJoinFastMultiKeyHashMap(
                  isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_MULTISET:
              hashTable = new VectorMapJoinFastMultiKeyHashMultiSet(
                      isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            case HASH_SET:
              hashTable = new VectorMapJoinFastMultiKeyHashSet(
                      isOuterJoin,
    - newThreshold, loadFactor, writeBufferSize, memUsage);
    + newThreshold, loadFactor, writeBufferSize);
              break;
            }
            break;
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10520: LLAP: Must reset small table result columns for Native Vectorization of Map Join (Matt McCline via Jason Dere)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/61176b10
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/61176b10
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/61176b10

    Branch: refs/heads/llap
    Commit: 61176b10cfdd1a50cbab12c4913622d8941606a8
    Parents: c72d073
    Author: Jason Dere <jdere@hortonworks.com>
    Authored: Fri May 1 11:24:49 2015 -0700
    Committer: Jason Dere <jdere@hortonworks.com>
    Committed: Fri May 1 11:24:49 2015 -0700

    ----------------------------------------------------------------------
      .../mapjoin/VectorMapJoinCommonOperator.java | 7 +++++
      .../VectorMapJoinGenerateResultOperator.java | 4 +--
      .../VectorMapJoinInnerBigOnlyLongOperator.java | 5 ++++
      ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 5 ++++
      ...VectorMapJoinInnerBigOnlyStringOperator.java | 5 ++++
      ...ectorMapJoinInnerGenerateResultOperator.java | 15 ++++++++++
      .../mapjoin/VectorMapJoinInnerLongOperator.java | 4 +++
      .../VectorMapJoinInnerMultiKeyOperator.java | 4 +++
      .../VectorMapJoinInnerStringOperator.java | 4 +++
      .../VectorMapJoinLeftSemiLongOperator.java | 5 ++++
      .../VectorMapJoinLeftSemiMultiKeyOperator.java | 5 ++++
      .../VectorMapJoinLeftSemiStringOperator.java | 5 ++++
      ...ectorMapJoinOuterGenerateResultOperator.java | 31 ++++++++++++++++----
      .../mapjoin/VectorMapJoinOuterLongOperator.java | 4 +++
      .../VectorMapJoinOuterMultiKeyOperator.java | 4 +++
      .../VectorMapJoinOuterStringOperator.java | 4 +++
      16 files changed, 103 insertions(+), 8 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    index f9d5736..b215f70 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    @@ -132,6 +132,10 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
        // to output batch scratch columns for the small table portion.
        protected VectorColumnSourceMapping smallTableMapping;

    + // These are the output columns for the small table and the outer small table keys.
    + protected int[] smallTableOutputVectorColumns;
    + protected int[] bigTableOuterKeyOutputVectorColumns;
    +
        // These are the columns in the big and small table that are ByteColumnVector columns.
        // We create data buffers for these columns so we can copy strings into those columns by value.
        protected int[] bigTableByteColumnVectorColumns;
    @@ -415,6 +419,9 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem

          smallTableMapping.finalize();

    + bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns();
    + smallTableOutputVectorColumns = smallTableMapping.getOutputColumns();
    +
          // Which big table and small table columns are ByteColumnVector and need have their data buffer
          // to be manually reset for some join result processing?


    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    index 70c8cb1..0f1c7a8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
    @@ -331,7 +331,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
              }
              if (bigTableVectorCopyOuterKeys != null) {
                bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, overflowBatch, 0);
    - for (int column : bigTableOuterKeyMapping.getOutputColumns()) {
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
                  overflowBatch.cols[column].isRepeating = true;
                }
              }
    @@ -347,7 +347,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
              }

              if (bigTableVectorCopyOuterKeys != null) {
    - for (int column : bigTableOuterKeyMapping.getOutputColumns()) {
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
                  ColumnVector colVector = overflowBatch.cols[column];
                  colVector.reset();
                }

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
    index 2173829..53a91d8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
    @@ -125,6 +125,11 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig

            batchCounter++;

    + // Do the per-batch setup for an inner big-only join.
    +
    + // (Currently none)
    + // innerBigOnlyPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
    index ab6c17e..9553fa0 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
    @@ -130,6 +130,11 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne

            batchCounter++;

    + // Do the per-batch setup for an inner big-only join.
    +
    + // (Currently none)
    + // innerBigOnlyPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    index 0b725aa..17d0b63 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
    @@ -116,6 +116,11 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB

            batchCounter++;

    + // Do the per-batch setup for an inner big-only join.
    +
    + // (Currently none)
    + // innerBigOnlyPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
    index a7eb454..3a5e4b2 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
    @@ -23,6 +23,7 @@ import java.io.IOException;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.ql.exec.JoinUtil;
    +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
      import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
      import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
      import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
    @@ -127,6 +128,20 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
         */

        /**
    + * Do the per-batch setup for an inner join.
    + */
    + protected void innerPerBatchSetup(VectorizedRowBatch batch) {
    +
    + // For join operators that can generate small table results, reset their
    + // (target) scratch columns.
    +
    + for (int column : smallTableOutputVectorColumns) {
    + ColumnVector smallTableColumn = batch.cols[column];
    + smallTableColumn.reset();
    + }
    + }
    +
    + /**
         * Generate the inner join output results for one vectorized row batch.
         *
         * @param batch

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
    index c998252..b77a93c 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
    @@ -124,6 +124,10 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe

            batchCounter++;

    + // Do the per-batch setup for an inner join.
    +
    + innerPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
    index e426476..938506b 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
    @@ -128,6 +128,10 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera

            batchCounter++;

    + // Do the per-batch setup for an inner join.
    +
    + innerPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    index 3bc225a..f7dd8e2 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
    @@ -115,6 +115,10 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate

            batchCounter++;

    + // Do the per-batch setup for an inner join.
    +
    + innerPerBatchSetup(batch);
    +
            // For inner joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
    index dd614da..75aeefb 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
    @@ -125,6 +125,11 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene

            batchCounter++;

    + // Do the per-batch setup for an left semi join.
    +
    + // (Currently none)
    + // leftSemiPerBatchSetup(batch);
    +
            // For left semi joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
    index cf4f312..ea287f4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
    @@ -129,6 +129,11 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi

            batchCounter++;

    + // Do the per-batch setup for an left semi join.
    +
    + // (Currently none)
    + // leftSemiPerBatchSetup(batch);
    +
            // For left semi joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    index 12d663c..116cb81 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
    @@ -116,6 +116,11 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe

            batchCounter++;

    + // Do the per-batch setup for an left semi join.
    +
    + // (Currently none)
    + // leftSemiPerBatchSetup(batch);
    +
            // For left semi joins, we may apply the filter(s) now.
            for(VectorExpression ve : bigTableFilterExpressions) {
              ve.evaluate(batch);

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
    index 3309921..7ef5574 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
    @@ -126,6 +126,25 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
         */

        /**
    + * Do the per-batch setup for an outer join.
    + */
    + protected void outerPerBatchSetup(VectorizedRowBatch batch) {
    +
    + // For join operators that can generate small table results, reset their
    + // (target) scratch columns.
    +
    + for (int column : smallTableOutputVectorColumns) {
    + ColumnVector smallTableColumn = batch.cols[column];
    + smallTableColumn.reset();
    + }
    +
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
    + ColumnVector bigTableOuterKeyColumn = batch.cols[column];
    + bigTableOuterKeyColumn.reset();
    + }
    + }
    +
    + /**
         * Generate the outer join output results for one vectorized row batch.
         *
         * Any filter expressions will apply now since hash map lookup for outer join is complete.
    @@ -413,7 +432,7 @@ public abstract class VectorMapJoinOuterGenerateResultOperator

             // Mark any scratch small table scratch columns that would normally receive a copy of the
             // key as null and repeating.
    - for (int column : bigTableOuterKeyMapping.getOutputColumns()) {
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
               ColumnVector colVector = batch.cols[column];
               colVector.isRepeating = true;
               colVector.noNulls = false;
    @@ -421,7 +440,7 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
             }

             // Small table values are set to null and repeating.
    - for (int column : smallTableMapping.getOutputColumns()) {
    + for (int column : smallTableOutputVectorColumns) {
               ColumnVector colVector = batch.cols[column];
               colVector.isRepeating = true;
               colVector.noNulls = false;
    @@ -442,14 +461,14 @@ public abstract class VectorMapJoinOuterGenerateResultOperator

               // Mark any scratch small table scratch columns that would normally receive a copy of the
               // key as null, too.
    - for (int column : bigTableOuterKeyMapping.getOutputColumns()) {
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
                 ColumnVector colVector = batch.cols[column];
                 colVector.noNulls = false;
                 colVector.isNull[batchIndex] = true;
               }

               // Small table values are set to null.
    - for (int column : smallTableMapping.getOutputColumns()) {
    + for (int column : smallTableOutputVectorColumns) {
                 ColumnVector colVector = batch.cols[column];
                 colVector.noNulls = false;
                 colVector.isNull[batchIndex] = true;
    @@ -573,7 +592,7 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
            batch.selectedInUse = true;
          }

    - for (int column : smallTableMapping.getOutputColumns()) {
    + for (int column : smallTableOutputVectorColumns) {
            ColumnVector colVector = batch.cols[column];
            colVector.noNulls = false;
            colVector.isNull[0] = true;
    @@ -582,7 +601,7 @@ public abstract class VectorMapJoinOuterGenerateResultOperator

          // Mark any scratch small table scratch columns that would normally receive a copy of the key
          // as null, too.
    - for (int column : bigTableOuterKeyMapping.getOutputColumns()) {
    + for (int column : bigTableOuterKeyOutputVectorColumns) {
            ColumnVector colVector = batch.cols[column];
            colVector.noNulls = false;
            colVector.isNull[0] = true;

    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
    index 8f18672..37ccf22 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
    @@ -123,6 +123,10 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe

            batchCounter++;

    + // Do the per-batch setup for an outer join.
    +
    + outerPerBatchSetup(batch);
    +
            // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
            // apply the filter before hash table matching.


    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
    index ffee959..23a29f7 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
    @@ -128,6 +128,10 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera

            batchCounter++;

    + // Do the per-batch setup for an outer join.
    +
    + outerPerBatchSetup(batch);
    +
            // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
            // apply the filter before hash table matching.


    http://git-wip-us.apache.org/repos/asf/hive/blob/61176b10/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    index 5167c19..f0af3f6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
    @@ -115,6 +115,10 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate

            batchCounter++;

    + // Do the per-batch setup for an outer join.
    +
    + outerPerBatchSetup(batch);
    +
            // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
            // apply the filter before hash table matching.
  • Prasanthj at May 7, 2015 at 1:20 am
    http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
    new file mode 100644
    index 0000000..4b39b2c
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
    @@ -0,0 +1,1587 @@
    +PREHOOK: query: -- Hybrid Grace Hash Join
    +-- Test basic functionalities:
    +-- 1. Various cases when hash partitions spill
    +-- 2. Partitioned table spilling
    +-- 3. Vectorization
    +
    +SELECT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +POSTHOOK: query: -- Hybrid Grace Hash Join
    +-- Test basic functionalities:
    +-- 1. Various cases when hash partitions spill
    +-- 2. Partitioned table spilling
    +-- 3. Vectorization
    +
    +SELECT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: _dummy_database@_dummy_table
    +#### A masked pattern was here ####
    +1
    +PREHOOK: query: -- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint < 2000000000) (type: boolean)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint < 2000000000) (type: boolean)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3152013
    +PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
    +-- The one in memory will eventually exceed memory limit, but won't spill.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
    +-- The one in memory will eventually exceed memory limit, but won't spill.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint < 2000000000) (type: boolean)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint < 2000000000) (type: boolean)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint
    + where c.cint < 2000000000) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3152013
    +PREHOOK: query: -- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Base result for inner join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3152013
    +PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
    +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
    +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + inner join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3152013
    +PREHOOK: query: -- Base result for outer join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Base result for outer join
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Left Outer Join0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3155128
    +PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
    +explain
    +select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Left Outer Join0 to 1
    + keys:
    + 0 _col0 (type: int)
    + 1 _col0 (type: int)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: c
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: cint (type: int)
    + outputColumnNames: _col0
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: int)
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select c.ctinyint
    + from alltypesorc c
    + left outer join alltypesorc cd
    + on cd.cint = c.cint) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +#### A masked pattern was here ####
    +3155128
    +PREHOOK: query: -- Partitioned table
    +create table parttbl (key string, value char(20)) partitioned by (dt char(10))
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@parttbl
    +POSTHOOK: query: -- Partitioned table
    +create table parttbl (key string, value char(20)) partitioned by (dt char(10))
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@parttbl
    +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
    + select * from src
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@parttbl@dt=2000-01-01
    +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
    + select * from src
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@parttbl@dt=2000-01-01
    +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
    + select * from src1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src1
    +PREHOOK: Output: default@parttbl@dt=2000-01-02
    +POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
    + select * from src1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src1
    +POSTHOOK: Output: default@parttbl@dt=2000-01-02
    +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
    +POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
    +PREHOOK: query: -- No spill, base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- No spill, base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: p1
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: p2
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@parttbl
    +PREHOOK: Input: default@parttbl@dt=2000-01-01
    +PREHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@parttbl
    +POSTHOOK: Input: default@parttbl@dt=2000-01-01
    +POSTHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +1217
    +PREHOOK: query: -- No spill, 2 partitions created in memory
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- No spill, 2 partitions created in memory
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: p1
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: p2
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@parttbl
    +PREHOOK: Input: default@parttbl@dt=2000-01-01
    +PREHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@parttbl
    +POSTHOOK: Input: default@parttbl@dt=2000-01-01
    +POSTHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +1217
    +PREHOOK: query: -- Spill case base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Spill case base result
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: p1
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: p2
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@parttbl
    +PREHOOK: Input: default@parttbl@dt=2000-01-01
    +PREHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@parttbl
    +POSTHOOK: Input: default@parttbl@dt=2000-01-01
    +POSTHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +1217
    +PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation
    +explain
    +select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: p1
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 key (type: string)
    + 1 key (type: string)
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: count()
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: p2
    + Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: key is not null (type: boolean)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: key (type: string)
    + sort order: +
    + Map-reduce partition columns: key (type: string)
    + Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@parttbl
    +PREHOOK: Input: default@parttbl@dt=2000-01-01
    +PREHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +POSTHOOK: query: select count(*) from
    +(select p1.value
    + from parttbl p1
    + inner join parttbl p2
    + on p1.key = p2.key) t1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@parttbl
    +POSTHOOK: Input: default@parttbl@dt=2000-01-01
    +POSTHOOK: Input: default@parttbl@dt=2000-01-02
    +#### A masked pattern was here ####
    +1217
    +PREHOOK: query: drop table parttbl
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@parttbl
    +PREHOOK: Output: default@parttbl
    +POSTHOOK: query: drop table parttbl
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@parttbl
    +POSTHOOK: Output: default@parttbl
    +PREHOOK: query: -- Test vectorization
    +-- Test case borrowed from vector_decimal_mapjoin.q
    +CREATE TABLE decimal_mapjoin STORED AS ORC AS
    + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
    + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
    + cint
    + FROM alltypesorc
    +PREHOOK: type: CREATETABLE_AS_SELECT
    +PREHOOK: Input: default@alltypesorc
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@decimal_mapjoin
    +POSTHOOK: query: -- Test vectorization
    +-- Test case borrowed from vector_decimal_mapjoin.q
    +CREATE TABLE decimal_mapjoin STORED AS ORC AS
    + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
    + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
    + cint
    + FROM alltypesorc
    +POSTHOOK: type: CREATETABLE_AS_SELECT
    +POSTHOOK: Input: default@alltypesorc
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@decimal_mapjoin
    +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 2 (BROADCAST_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: l
    + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint = 6981) (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 6981 (type: int)
    + 1 6981 (type: int)
    + outputColumnNames: _col1, _col9
    + input vertices:
    + 1 Map 2
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: r
    + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint = 6981) (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: 6981 (type: int)
    + sort order: +
    + Map-reduce partition columns: 6981 (type: int)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + value expressions: cdecimal2 (type: decimal(23,14))
    + Execution mode: vectorized
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@decimal_mapjoin
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@decimal_mapjoin
    +#### A masked pattern was here ####
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 -617.5607769230769
    +6981 6981 5831542.269248378 -617.5607769230769
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 6984454.211097692
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 6984454.211097692
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 6984454.211097692
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 2 (BROADCAST_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: l
    + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint = 6981) (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 6981 (type: int)
    + 1 6981 (type: int)
    + outputColumnNames: _col1, _col9
    + input vertices:
    + 1 Map 2
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Select Operator
    + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
    + outputColumnNames: _col0, _col1, _col2, _col3
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: r
    + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (cint = 6981) (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: 6981 (type: int)
    + sort order: +
    + Map-reduce partition columns: 6981 (type: int)
    + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
    + value expressions: cdecimal2 (type: decimal(23,14))
    + Execution mode: vectorized
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@decimal_mapjoin
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
    + FROM decimal_mapjoin l
    + JOIN decimal_mapjoin r ON l.cint = r.cint
    + WHERE l.cint = 6981
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@decimal_mapjoin
    +#### A masked pattern was here ####
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 -617.5607769230769
    +6981 6981 5831542.269248378 -617.5607769230769
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 6984454.211097692
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 5831542.269248378 NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL -617.5607769230769
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL 6984454.211097692
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 NULL NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 6984454.211097692
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 -617.5607769230769
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 6984454.211097692
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +6981 6981 -515.621072973 NULL
    +PREHOOK: query: DROP TABLE decimal_mapjoin
    +PREHOOK: type: DROPTABLE
    +PREHOOK: Input: default@decimal_mapjoin
    +PREHOOK: Output: default@decimal_mapjoin
    +POSTHOOK: query: DROP TABLE decimal_mapjoin
    +POSTHOOK: type: DROPTABLE
    +POSTHOOK: Input: default@decimal_mapjoin
    +POSTHOOK: Output: default@decimal_mapjoin
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10549 : CBO (Calcite Return Path): Enable NonBlockingOpDeDupProc (Pengcheng Xiong via Ashutosh Chauhan)

    Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6b471dab
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6b471dab
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6b471dab

    Branch: refs/heads/llap
    Commit: 6b471dab6fa4adea12bd102864611b4c20f1f0f2
    Parents: 61176b1
    Author: Pengcheng Xiong <pxiong@hortonworks.com>
    Authored: Wed Apr 29 22:25:00 2015 -0700
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Fri May 1 11:32:33 2015 -0700

    ----------------------------------------------------------------------
      ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java | 4 +---
      1 file changed, 1 insertion(+), 3 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/6b471dab/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    index 5afd4f2..58f8afe 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    @@ -149,9 +149,7 @@ public class Optimizer {
          if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
            transformations.add(new ReduceSinkDeDuplication());
          }
    - if(!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
    - transformations.add(new NonBlockingOpDeDupProc());
    - }
    + transformations.add(new NonBlockingOpDeDupProc());
          if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)
              && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
            transformations.add(new IdentityProjectRemover());
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-9908: vectorization error binary type not supported, group by with binary columns (Matt McCline via Jason Dere)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bf41faa
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bf41faa
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bf41faa

    Branch: refs/heads/llap
    Commit: 3bf41faa04284b1f50adcd7da50dbb74664a8396
    Parents: ce736af
    Author: Jason Dere <jdere@hortonworks.com>
    Authored: Mon May 4 21:55:19 2015 -0700
    Committer: Jason Dere <jdere@hortonworks.com>
    Committed: Mon May 4 21:55:19 2015 -0700

    ----------------------------------------------------------------------
      .../test/resources/testconfiguration.properties | 1 +
      .../ql/exec/vector/VectorColumnSetInfo.java | 3 +-
      .../clientpositive/vector_binary_join_groupby.q | 55 ++++
      .../tez/vector_binary_join_groupby.q.out | 303 +++++++++++++++++++
      .../vector_binary_join_groupby.q.out | 293 ++++++++++++++++++
      5 files changed, 654 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
    index b2a6e58..8e9984a 100644
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@ -184,6 +184,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
        update_two_cols.q,\
        vector_aggregate_9.q,\
        vector_between_in.q,\
    + vector_binary_join_groupby.q,\
        vector_bucket.q,\
        vector_cast_constant.q,\
        vector_char_2.q,\

    http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
    index d9c16dc..8c4b6ea 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
    @@ -126,7 +126,8 @@ public class VectorColumnSetInfo {
            doubleIndices[doubleIndicesIndex] = addIndex;
            indexLookup[addIndex].setDouble(doubleIndicesIndex);
            ++doubleIndicesIndex;
    - } else if (VectorizationContext.isStringFamily(outputType)) {
    + } else if (VectorizationContext.isStringFamily(outputType) ||
    + outputType.equalsIgnoreCase("binary")) {
            stringIndices[stringIndicesIndex]= addIndex;
            indexLookup[addIndex].setString(stringIndicesIndex);
            ++stringIndicesIndex;

    http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
    new file mode 100644
    index 0000000..3bdfd8c
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
    @@ -0,0 +1,55 @@
    +SET hive.auto.convert.join=true;
    +SET hive.auto.convert.join.noconditionaltask=true;
    +SET hive.auto.convert.join.noconditionaltask.size=1000000000;
    +SET hive.vectorized.execution.enabled=true;
    +
    +DROP TABLE over1k;
    +DROP TABLE hundredorc;
    +
    +-- data setup
    +CREATE TABLE over1k(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE;
    +
    +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k;
    +
    +CREATE TABLE hundredorc(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS ORC;
    +
    +INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100;
    +
    +EXPLAIN
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
    +
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin;
    +
    +EXPLAIN
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin;
    +
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin;

    http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
    new file mode 100644
    index 0000000..8dcd40d
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
    @@ -0,0 +1,303 @@
    +PREHOOK: query: DROP TABLE over1k
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE over1k
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE hundredorc
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE hundredorc
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: -- data setup
    +CREATE TABLE over1k(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@over1k
    +POSTHOOK: query: -- data setup
    +CREATE TABLE over1k(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@over1k
    +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@over1k
    +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@over1k
    +PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@hundredorc
    +POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@hundredorc
    +PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@over1k
    +PREHOOK: Output: default@hundredorc
    +POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@over1k
    +POSTHOOK: Output: default@hundredorc
    +POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
    +POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ]
    +PREHOOK: query: EXPLAIN
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 3 (BROADCAST_EDGE)
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: t1
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: bin is not null (type: boolean)
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 bin (type: binary)
    + 1 bin (type: binary)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
    + input vertices:
    + 1 Map 3
    + Statistics: Num rows: 55 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Group By Operator
    + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24))
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Map 3
    + Map Operator Tree:
    + TableScan
    + alias: t2
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: bin is not null (type: boolean)
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: bin (type: binary)
    + sort order: +
    + Map-reduce partition columns: bin (type: binary)
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + value expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2))
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Map Join MAPJOIN[13][bigTable=t1] in task 'Map 1' is a cross product
    +PREHOOK: query: SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +-107801098240
    +PREHOOK: query: EXPLAIN
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Reducer 2 <- Map 1 (SIMPLE_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: hundredorc
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: bin (type: binary)
    + outputColumnNames: _col0
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + keys: _col0 (type: binary)
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: binary)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: binary)
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: bigint)
    + Execution mode: vectorized
    + Reducer 2
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + keys: KEY._col0 (type: binary)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: bigint), _col0 (type: binary)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Execution mode: vectorized
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +5 american history
    +5 biology
    +2 chemistry
    +2 debate
    +4 education
    +5 forestry
    +4 geology
    +5 history
    +6 industrial engineering
    +3 joggying
    +5 kindergarten
    +1 linguistics
    +9 mathematics
    +8 nap time
    +1 opthamology
    +2 philosophy
    +5 quiet hour
    +4 religion
    +3 study skills
    +7 topology
    +1 undecided
    +2 values clariffication
    +3 wind surfing
    +3 xylophone band
    +2 yard duty
    +3 zync studies

    http://git-wip-us.apache.org/repos/asf/hive/blob/3bf41faa/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
    new file mode 100644
    index 0000000..c3e4d52
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
    @@ -0,0 +1,293 @@
    +PREHOOK: query: DROP TABLE over1k
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE over1k
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: DROP TABLE hundredorc
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: DROP TABLE hundredorc
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: -- data setup
    +CREATE TABLE over1k(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@over1k
    +POSTHOOK: query: -- data setup
    +CREATE TABLE over1k(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
    +STORED AS TEXTFILE
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@over1k
    +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
    +PREHOOK: type: LOAD
    +#### A masked pattern was here ####
    +PREHOOK: Output: default@over1k
    +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k
    +POSTHOOK: type: LOAD
    +#### A masked pattern was here ####
    +POSTHOOK: Output: default@over1k
    +PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@hundredorc
    +POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
    + si smallint,
    + i int,
    + b bigint,
    + f float,
    + d double,
    + bo boolean,
    + s string,
    + ts timestamp,
    + dec decimal(4,2),
    + bin binary)
    +STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@hundredorc
    +PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@over1k
    +PREHOOK: Output: default@hundredorc
    +POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k LIMIT 100
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@over1k
    +POSTHOOK: Output: default@hundredorc
    +POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
    +POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ]
    +POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ]
    +PREHOOK: query: EXPLAIN
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-5 is a root stage
    + Stage-2 depends on stages: Stage-5
    + Stage-0 depends on stages: Stage-2
    +
    +STAGE PLANS:
    + Stage: Stage-5
    + Map Reduce Local Work
    + Alias -> Map Local Tables:
    + t1
    + Fetch Operator
    + limit: -1
    + Alias -> Map Local Operator Tree:
    + t1
    + TableScan
    + alias: t1
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: bin is not null (type: boolean)
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + HashTable Sink Operator
    + keys:
    + 0 bin (type: binary)
    + 1 bin (type: binary)
    +
    + Stage: Stage-2
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: t2
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: bin is not null (type: boolean)
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 bin (type: binary)
    + 1 bin (type: binary)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
    + Statistics: Num rows: 55 Data size: 16300 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21,_col22,_col23,_col24))
    + mode: hash
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + sort order:
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col0 (type: bigint)
    + Local Work:
    + Map Reduce Local Work
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: sum(VALUE._col0)
    + mode: mergepartial
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
    +PREHOOK: query: SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT sum(hash(*))
    +FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +-107801098240
    +PREHOOK: query: EXPLAIN
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +PREHOOK: type: QUERY
    +POSTHOOK: query: EXPLAIN
    +SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: hundredorc
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: bin (type: binary)
    + outputColumnNames: _col0
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Group By Operator
    + aggregations: count()
    + keys: _col0 (type: binary)
    + mode: hash
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col0 (type: binary)
    + sort order: +
    + Map-reduce partition columns: _col0 (type: binary)
    + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
    + value expressions: _col1 (type: bigint)
    + Execution mode: vectorized
    + Reduce Operator Tree:
    + Group By Operator
    + aggregations: count(VALUE._col0)
    + keys: KEY._col0 (type: binary)
    + mode: mergepartial
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col1 (type: bigint), _col0 (type: binary)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +POSTHOOK: query: SELECT count(*), bin
    +FROM hundredorc
    +GROUP BY bin
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@hundredorc
    +#### A masked pattern was here ####
    +5 american history
    +5 biology
    +2 chemistry
    +2 debate
    +4 education
    +5 forestry
    +4 geology
    +5 history
    +6 industrial engineering
    +3 joggying
    +5 kindergarten
    +1 linguistics
    +9 mathematics
    +8 nap time
    +1 opthamology
    +2 philosophy
    +5 quiet hour
    +4 religion
    +3 study skills
    +7 topology
    +1 undecided
    +2 values clariffication
    +3 wind surfing
    +3 xylophone band
    +2 yard duty
    +3 zync studies
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10546 : genFileSinkPlan should use the generated SEL's RR for the partition col of FS (Pengcheng Xiong via Ashutosh Chauhan)

    Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/50704eec
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/50704eec
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/50704eec

    Branch: refs/heads/llap
    Commit: 50704eec0aaaf865532e2cdbbd9682a86288fc51
    Parents: 59ecdd3
    Author: Pengcheng Xiong <pxiong@hortonworks.com>
    Authored: Wed Apr 29 21:36:00 2015 -0700
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Fri May 1 12:09:17 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 42 +++++++++++++++++---
      1 file changed, 36 insertions(+), 6 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/50704eec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    index 1d2c764..dec0e38 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    @@ -6043,7 +6043,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
          boolean enforceBucketing = false;
          boolean enforceSorting = false;
          ArrayList<ExprNodeDesc> partnCols = new ArrayList<ExprNodeDesc>();
    - ArrayList<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>();
          ArrayList<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
          ArrayList<Integer> sortOrders = new ArrayList<Integer>();
          boolean multiFileSpray = false;
    @@ -6055,11 +6054,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            enforceBucketing = true;
            if (updating() || deleting()) {
              partnCols = getPartitionColsFromBucketColsForUpdateDelete(input, true);
    - partnColsNoConvert = getPartitionColsFromBucketColsForUpdateDelete(input, false);
            } else {
              partnCols = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input, true);
    - partnColsNoConvert = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input,
    - false);
            }
          }

    @@ -6071,7 +6067,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            sortOrders = getSortOrders(dest, qb, dest_tab, input);
            if (!enforceBucketing) {
              partnCols = sortCols;
    - partnColsNoConvert = getSortCols(dest, qb, dest_tab, table_desc, input, false);
            }
          }

    @@ -6107,12 +6102,41 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0));
            ctx.setMultiFileSpray(multiFileSpray);
            ctx.setNumFiles(numFiles);
    - ctx.setPartnCols(partnColsNoConvert);
            ctx.setTotalFiles(totalFiles);
          }
          return input;
        }

    + private void genPartnCols(String dest, Operator input, QB qb,
    + TableDesc table_desc, Table dest_tab, SortBucketRSCtx ctx) throws SemanticException {
    + boolean enforceBucketing = false;
    + boolean enforceSorting = false;
    + ArrayList<ExprNodeDesc> partnColsNoConvert = new ArrayList<ExprNodeDesc>();
    +
    + if ((dest_tab.getNumBuckets() > 0) &&
    + (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
    + enforceBucketing = true;
    + if (updating() || deleting()) {
    + partnColsNoConvert = getPartitionColsFromBucketColsForUpdateDelete(input, false);
    + } else {
    + partnColsNoConvert = getPartitionColsFromBucketCols(dest, qb, dest_tab, table_desc, input,
    + false);
    + }
    + }
    +
    + if ((dest_tab.getSortCols() != null) &&
    + (dest_tab.getSortCols().size() > 0) &&
    + (conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTING))) {
    + enforceSorting = true;
    + if (!enforceBucketing) {
    + partnColsNoConvert = getSortCols(dest, qb, dest_tab, table_desc, input, false);
    + }
    + }
    +
    + if (enforceBucketing || enforceSorting) {
    + ctx.setPartnCols(partnColsNoConvert);
    + }
    + }
        /**
         * Check for HOLD_DDLTIME hint.
         *
    @@ -6556,6 +6580,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
          // If this table is working with ACID semantics, turn off merging
          canBeMerged &= !destTableIsAcid;

    + // Generate the partition columns from the parent input
    + if (dest_type.intValue() == QBMetaData.DEST_TABLE
    + || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
    + genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
    + }
    +
          FileSinkDesc fileSinkDesc = new FileSinkDesc(
            queryTmpdir,
            table_desc,
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10572 - Improve Hive service test to check empty string (Chao, reviewed by Thejas)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/65d63666
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/65d63666
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/65d63666

    Branch: refs/heads/llap
    Commit: 65d63666d361ac8ad845dec5be1523f8ba88ff10
    Parents: 50704ee
    Author: Chao Sun <sunchao@apache.org>
    Authored: Fri May 1 17:12:08 2015 -0700
    Committer: Chao Sun <sunchao@apache.org>
    Committed: Fri May 1 17:12:08 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hive/service/auth/LdapAuthenticationProviderImpl.java | 2 +-
      1 file changed, 1 insertion(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/65d63666/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
    index 854d078..4e2ef90 100644
    --- a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
    +++ b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
    @@ -53,7 +53,7 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
            user = user + "@" + ldapDomain;
          }

    - if (password == null || password.isEmpty()) {
    + if (password == null || password.isEmpty() || password.getBytes()[0] == 0) {
            throw new AuthenticationException("Error validating LDAP user:" +
                " a null or blank password has been provided");
          }
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-9736 : StorageBasedAuthProvider should batch namenode-calls where possible (Mithun Radhakrishnan, reviewed by Chris Nauroth, Sushanth Sowmyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/19886150
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/19886150
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/19886150

    Branch: refs/heads/llap
    Commit: 19886150121b6081127bf1e581b24d8dcc12f1df
    Parents: 3f72f81
    Author: Sushanth Sowmyan <khorgath@gmail.com>
    Authored: Tue May 5 08:56:27 2015 -0700
    Committer: Sushanth Sowmyan <khorgath@gmail.com>
    Committed: Tue May 5 08:58:35 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/common/FileUtils.java | 155 +++++++++++++------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 7 +
      .../StorageBasedAuthorizationProvider.java | 114 +++++++++++++-
      .../apache/hadoop/hive/shims/Hadoop23Shims.java | 29 +++-
      .../org/apache/hadoop/fs/DefaultFileAccess.java | 65 +++++---
      .../apache/hadoop/hive/shims/HadoopShims.java | 24 ++-
      .../hadoop/hive/shims/HadoopShimsSecure.java | 8 +
      7 files changed, 318 insertions(+), 84 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    index c2c54bc..536fe11 100644
    --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
    @@ -25,12 +25,16 @@ import java.net.URISyntaxException;
      import java.security.AccessControlException;
      import java.security.PrivilegedExceptionAction;
      import java.util.BitSet;
    +import java.util.EnumSet;
    +import java.util.Iterator;
      import java.util.List;

    +import com.google.common.base.Function;
    +import com.google.common.collect.Iterators;
    +import com.google.common.collect.Lists;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.conf.Configuration;
    -import org.apache.hadoop.fs.DefaultFileAccess;
      import org.apache.hadoop.fs.FileStatus;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.FileUtil;
    @@ -369,26 +373,54 @@ public final class FileUtils {
        public static void checkFileAccessWithImpersonation(final FileSystem fs,
            final FileStatus stat, final FsAction action, final String user)
                throws IOException, AccessControlException, InterruptedException, Exception {
    + checkFileAccessWithImpersonation(fs,
    + Iterators.singletonIterator(stat),
    + EnumSet.of(action),
    + user);
    + }
    +
    + /**
    + * Perform a check to determine if the user is able to access the file passed in.
    + * If the user name passed in is different from the current user, this method will
    + * attempt to do impersonate the user to do the check; the current user should be
    + * able to create proxy users in this case.
    + * @param fs FileSystem of the path to check
    + * @param statuses FileStatus instances representing the file
    + * @param actions The FsActions that will be checked
    + * @param user User name of the user that will be checked for access. If the user name
    + * is null or the same as the current user, no user impersonation will be done
    + * and the check will be done as the current user. Otherwise the file access
    + * check will be performed within a doAs() block to use the access privileges
    + * of this user. In this case the user must be configured to impersonate other
    + * users, otherwise this check will fail with error.
    + * @throws IOException
    + * @throws AccessControlException
    + * @throws InterruptedException
    + * @throws Exception
    + */
    + public static void checkFileAccessWithImpersonation(final FileSystem fs,
    + final Iterator<FileStatus> statuses, final EnumSet<FsAction> actions, final String user)
    + throws IOException, AccessControlException, InterruptedException, Exception {
          UserGroupInformation ugi = Utils.getUGI();
          String currentUser = ugi.getShortUserName();

          if (user == null || currentUser.equals(user)) {
            // No need to impersonate user, do the checks as the currently configured user.
    - ShimLoader.getHadoopShims().checkFileAccess(fs, stat, action);
    - return;
    + ShimLoader.getHadoopShims().checkFileAccess(fs, statuses, actions);
    + }
    + else {
    + // Otherwise, try user impersonation. Current user must be configured to do user impersonation.
    + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(
    + user, UserGroupInformation.getLoginUser());
    + proxyUser.doAs(new PrivilegedExceptionAction<Object>() {
    + @Override
    + public Object run() throws Exception {
    + FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf());
    + ShimLoader.getHadoopShims().checkFileAccess(fsAsUser, statuses, actions);
    + return null;
    + }
    + });
          }
    -
    - // Otherwise, try user impersonation. Current user must be configured to do user impersonation.
    - UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(
    - user, UserGroupInformation.getLoginUser());
    - proxyUser.doAs(new PrivilegedExceptionAction<Object>() {
    - @Override
    - public Object run() throws Exception {
    - FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf());
    - ShimLoader.getHadoopShims().checkFileAccess(fsAsUser, stat, action);
    - return null;
    - }
    - });
        }

        /**
    @@ -677,70 +709,91 @@ public final class FileUtils {
         * @param path
         * @param conf
         * @param user
    - * @throws AccessControlException
    - * @throws InterruptedException
         * @throws Exception
         */
    - public static void checkDeletePermission(Path path, Configuration conf, String user)
    - throws AccessControlException, InterruptedException, Exception {
    - // This requires ability to delete the given path.
    - // The following 2 conditions should be satisfied for this-
    - // 1. Write permissions on parent dir
    - // 2. If sticky bit is set on parent dir then one of following should be
    - // true
    - // a. User is owner of the current dir/file
    - // b. User is owner of the parent dir
    - // Super users are also allowed to drop the file, but there is no good way of checking
    - // if a user is a super user. Also super users running hive queries is not a common
    - // use case. super users can also do a chown to be able to drop the file
    + public static void checkDeletePermission(Path path, Configuration conf, String user) throws Exception {

          if(path == null) {
            // no file/dir to be deleted
            return;
          }

    - final FileSystem fs = path.getFileSystem(conf);
          // check user has write permissions on the parent dir
    + final FileSystem fs = path.getFileSystem(conf);
          FileStatus stat = null;
          try {
            stat = fs.getFileStatus(path);
          } catch (FileNotFoundException e) {
            // ignore
          }
    +
          if (stat == null) {
            // no file/dir to be deleted
            return;
          }
    - FileUtils.checkFileAccessWithImpersonation(fs, stat, FsAction.WRITE, user);
    +
    + checkDeletePermission(fs, Lists.newArrayList(stat), conf, user);
    + }
    +
    + /**
    + * Checks if delete can be performed on given path by given user.
    + * If file does not exist it just returns without throwing an Exception
    + * @param fs The FileSystem instance
    + * @param fileStatuses The FileStatus instances for the paths being checked.
    + * @param conf Configuration, corresponding to the FileSystem.
    + * @param user The user, whose permission is to be checked.
    + * @throws Exception
    + */
    + public static void checkDeletePermission(FileSystem fs, Iterable<FileStatus> fileStatuses,
    + Configuration conf, String user) throws Exception {
    +
    + // This requires ability to delete the given path.
    + // The following 2 conditions should be satisfied for this-
    + // 1. Write permissions on parent dir
    + // 2. If sticky bit is set on parent dir then one of following should be
    + // true
    + // a. User is owner of the current dir/file
    + // b. User is owner of the parent dir
    + FileUtils.checkFileAccessWithImpersonation(fs, fileStatuses.iterator(), EnumSet.of(FsAction.WRITE), user);

          HadoopShims shims = ShimLoader.getHadoopShims();
          if (!shims.supportStickyBit()) {
    - // not supports sticky bit
    + // No support for sticky-bit.
            return;
          }

    - // check if sticky bit is set on the parent dir
    - FileStatus parStatus = fs.getFileStatus(path.getParent());
    - if (!shims.hasStickyBit(parStatus.getPermission())) {
    - // no sticky bit, so write permission on parent dir is sufficient
    - // no further checks needed
    - return;
    - }
    + List<Path> allParentPaths =
    + Lists.newArrayList(
    + Iterators.transform(fileStatuses.iterator(), new Function<FileStatus, Path>() {
    + @Override
    + public Path apply(FileStatus input) {
    + return input.getPath().getParent();
    + }
    + })
    + );
    +
    + Iterator<FileStatus> childStatusIterator = fileStatuses.iterator();
    + for (List<Path> parentPaths : Lists.partition(allParentPaths, getListStatusBatchSize(conf))) {
    + for (FileStatus parentFileStatus : fs.listStatus(parentPaths.toArray(new Path[parentPaths.size()]))) {
    + assert childStatusIterator.hasNext() : "Number of parent-file-statuses doesn't match children.";
    + FileStatus childFileStatus = childStatusIterator.next();
    + // Check sticky-bits on parent-dirs.
    + if (shims.hasStickyBit(parentFileStatus.getPermission())
    + && !parentFileStatus.getOwner().equals(user)
    + && !childFileStatus.getOwner().equals(user)) {
    + throw new IOException(String.format("Permission Denied: User %s can't delete %s because sticky bit is\""
    + + " set on the parent dir and user does not own this file or its parent\"", user, childFileStatus.getPath()));
    + }
    + } // for_each( parent_path );
    + } // for_each( batch_of_parentPaths );

    - // check if user is owner of parent dir
    - if (parStatus.getOwner().equals(user)) {
    - return;
    - }
    + assert !childStatusIterator.hasNext() : "Did not process all file-statuses.";

    - // check if user is owner of current dir/file
    - FileStatus childStatus = fs.getFileStatus(path);
    - if (childStatus.getOwner().equals(user)) {
    - return;
    - }
    - String msg = String.format("Permission Denied: User %s can't delete %s because sticky bit is"
    - + " set on the parent dir and user does not own this file or its parent", user, path);
    - throw new IOException(msg);
    + } // static void checkDeletePermission();

    + private static int getListStatusBatchSize(Configuration configuration) {
    + return HiveConf.getIntVar(configuration,
    + HiveConf.ConfVars.HIVE_AUTHORIZATION_HDFS_LIST_STATUS_BATCH_SIZE);
        }

        /**

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index d208b88..f04ce82 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -1623,6 +1623,13 @@ public class HiveConf extends Configuration {
              "of updating the original list means that you can append to the defaults\n" +
              "set by SQL standard authorization instead of replacing it entirely."),

    + HIVE_AUTHORIZATION_HDFS_LIST_STATUS_BATCH_SIZE(
    + "hive.authprovider.hdfs.liststatus.batch.size", 1000,
    + "Number of FileStatus objects to be queried for when listing files, for HDFS-based authorization.\n" +
    + "Note: If this exceeds dfs.ls.limit (as set in hdfs-site.xml), DFSClient might use the smaller value as \n" +
    + "the batch-size, internally."
    + ),
    +
          HIVE_CLI_PRINT_HEADER("hive.cli.print.header", false, "Whether to print the names of the columns in query output."),

          HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false,

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
    index 8f81ef9..6a5c510 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
    @@ -18,15 +18,20 @@

      package org.apache.hadoop.hive.ql.security.authorization;

    -import java.io.FileNotFoundException;
      import java.io.IOException;
      import java.security.AccessControlException;
      import java.util.ArrayList;
    +import java.util.Arrays;
      import java.util.EnumSet;
    +import java.util.Iterator;
      import java.util.List;

      import javax.security.auth.login.LoginException;

    +import com.google.common.base.Function;
    +import com.google.common.base.Predicate;
    +import com.google.common.collect.Iterators;
    +import com.google.common.collect.Lists;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.conf.Configuration;
    @@ -35,6 +40,7 @@ import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.permission.FsAction;
      import org.apache.hadoop.hive.common.FileUtils;
    +import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
      import org.apache.hadoop.hive.metastore.TableType;
      import org.apache.hadoop.hive.metastore.Warehouse;
    @@ -63,7 +69,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
       * out to the parent directory recursively to determine its permissions till
       * it finds a parent that does exist.
       */
    -public class StorageBasedAuthorizationProvider extends HiveAuthorizationProviderBase
    +public class StorageBasedAuthorizationProvider extends HiveMultiPartitionAuthorizationProviderBase
          implements HiveMetastoreAuthorizationProvider {

        private Warehouse wh;
    @@ -242,6 +248,89 @@ public class StorageBasedAuthorizationProvider extends HiveAuthorizationProvider
          }
        }

    + @Override
    + public void authorize(Table table, Iterable<Partition> partitions,
    + Privilege[] requiredReadPrivileges, Privilege[] requiredWritePrivileges)
    + throws HiveException, AuthorizationException {
    +
    + try {
    + class MustCheckTablePermissions { // For closure.
    + public boolean value = false;
    + }
    +
    + final MustCheckTablePermissions mustCheckTablePermissions = new MustCheckTablePermissions();
    + final FileSystem fs = table.getDataLocation().getFileSystem(getConf());
    +
    + // Get partition paths. Filter out null-partitions, and partitions without data-locations.
    + Iterator<Partition> nonNullPartitions
    + = Iterators.filter(partitions.iterator(), new Predicate<Partition>() {
    + @Override
    + public boolean apply(Partition partition) {
    + try {
    + boolean isValidPartitionPath = partition != null
    + && partition.getDataLocation() != null
    + && fs.exists(partition.getDataLocation());
    + mustCheckTablePermissions.value |= isValidPartitionPath;
    + return isValidPartitionPath;
    + }
    + catch (IOException exception){
    + throw new RuntimeException("Could not find location for partition: " + partition, exception);
    + }
    + }
    + });
    +
    + if (mustCheckTablePermissions.value) {
    + // At least one partition was null, or had a non-existent path. So check table-permissions, once.
    + // Partition path can be null in the case of a new create partition - in this case,
    + // we try to default to checking the permissions of the parent table.
    + // Partition itself can also be null, in cases where this gets called as a generic
    + // catch-all call in cases like those with CTAS onto an unpartitioned table (see HIVE-1887)
    +
    + // this should be the case only if this is a create partition.
    + // The privilege needed on the table should be ALTER_DATA, and not CREATE
    + authorize(table, new Privilege[]{}, new Privilege[]{Privilege.ALTER_DATA});
    + }
    +
    +
    + // authorize drops if there was a drop privilege requirement
    + // extract drop privileges
    + DropPrivilegeExtractor privExtractor = new DropPrivilegeExtractor(requiredReadPrivileges, requiredWritePrivileges);
    + requiredReadPrivileges = privExtractor.getReadReqPriv();
    + requiredWritePrivileges = privExtractor.getWriteReqPriv();
    + EnumSet<FsAction> actions = getFsActions(requiredReadPrivileges);
    + actions.addAll(getFsActions(requiredWritePrivileges));
    +
    + ArrayList<Path> allPartitionPaths
    + = Lists.newArrayList(Iterators.transform(nonNullPartitions, new Function<Partition, Path>() {
    + @Override
    + public Path apply(Partition input) {
    + return input.getDataLocation();
    + }
    + }));
    +
    + for (List<Path> partitionPaths : Lists.partition(allPartitionPaths, getListStatusBatchSize(getConf()))) {
    +
    + List<FileStatus> fileStatuses = Arrays.asList(
    + fs.listStatus(partitionPaths.toArray(new Path[partitionPaths.size()])));
    +
    + if (privExtractor.hasDropPrivilege) {
    + FileUtils.checkDeletePermission(fs, fileStatuses, getConf(), authenticator.getUserName());
    + }
    +
    + checkPermissions(fs, fileStatuses.iterator(), actions, authenticator.getUserName());
    + }
    +
    + }
    + catch (Exception exception) {
    + throw hiveException(exception);
    + }
    + }
    +
    + private static int getListStatusBatchSize(Configuration configuration) {
    + return HiveConf.getIntVar(configuration,
    + HiveConf.ConfVars.HIVE_AUTHORIZATION_HDFS_LIST_STATUS_BATCH_SIZE);
    + }
    +
        private void checkDeletePermission(Path dataLocation, Configuration conf, String userName)
            throws HiveException {
          try {
    @@ -388,17 +477,28 @@ public class StorageBasedAuthorizationProvider extends HiveAuthorizationProvider
        protected static void checkPermissions(final FileSystem fs, final FileStatus stat,
            final EnumSet<FsAction> actions, String user) throws IOException,
            AccessControlException, HiveException {
    + checkPermissions(fs, Iterators.singletonIterator(stat), actions, user);
    + }
    +
    + @SuppressWarnings("deprecation")
    + protected static void checkPermissions(final FileSystem fs, Iterator<FileStatus> fileStatuses,
    + final EnumSet<FsAction> actions, String user)
    + throws IOException, AccessControlException, HiveException {

    - if (stat == null) {
    - // File named by path doesn't exist; nothing to validate.
    - return;
    - }
          FsAction checkActions = FsAction.NONE;
          for (FsAction action : actions) {
            checkActions = checkActions.or(action);
          }
    +
    + Iterator<FileStatus> nonNullFileStatuses = Iterators.filter(fileStatuses, new Predicate<FileStatus>() {
    + @Override
    + public boolean apply(FileStatus fileStatus) {
    + return fileStatus != null;
    + }
    + });
    +
          try {
    - FileUtils.checkFileAccessWithImpersonation(fs, stat, checkActions, user);
    + FileUtils.checkFileAccessWithImpersonation(fs, nonNullFileStatuses, EnumSet.of(checkActions), user);
          } catch (Exception err) {
            // fs.permission.AccessControlException removed by HADOOP-11356, but Hive users on older
            // Hadoop versions may still see this exception .. have to reference by name.

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    ----------------------------------------------------------------------
    diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    index d349068..4547baa 100644
    --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
    @@ -29,11 +29,11 @@ import java.security.AccessControlException;
      import java.security.NoSuchAlgorithmException;
      import java.util.ArrayList;
      import java.util.Comparator;
    +import java.util.EnumSet;
      import java.util.HashMap;
      import java.util.Iterator;
      import java.util.List;
      import java.util.Map;
    -import java.util.Set;
      import java.util.TreeMap;

      import org.apache.commons.lang.StringUtils;
    @@ -986,6 +986,33 @@ public class Hadoop23Shims extends HadoopShimsSecure {
          }
        }

    + @Override
    + public void checkFileAccess(FileSystem fs, Iterator<FileStatus> statuses, EnumSet<FsAction> actions)
    + throws IOException, AccessControlException, Exception {
    + try {
    + if (accessMethod == null) {
    + // Have to rely on Hive implementation of filesystem permission checks.
    + DefaultFileAccess.checkFileAccess(fs, statuses, actions);
    + }
    + else {
    + while (statuses.hasNext()) {
    + accessMethod.invoke(fs, statuses.next(), combine(actions));
    + }
    + }
    +
    + } catch (Exception err) {
    + throw wrapAccessException(err);
    + }
    + }
    +
    + private static FsAction combine(EnumSet<FsAction> actions) {
    + FsAction resultantAction = FsAction.NONE;
    + for (FsAction action : actions) {
    + resultantAction = resultantAction.or(action);
    + }
    + return resultantAction;
    + }
    +
        /**
         * If there is an AccessException buried somewhere in the chain of failures, wrap the original
         * exception in an AccessException. Othewise just return the original exception.

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
    ----------------------------------------------------------------------
    diff --git a/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java b/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
    index 45ca210..c4261cb 100644
    --- a/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
    +++ b/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java
    @@ -18,23 +18,22 @@

      package org.apache.hadoop.fs;

    -import java.io.FileNotFoundException;
      import java.io.IOException;
      import java.security.AccessControlException;
    -import java.util.ArrayList;
      import java.util.Arrays;
    +import java.util.Collections;
      import java.util.EnumSet;
    +import java.util.Iterator;
      import java.util.List;

      import javax.security.auth.login.LoginException;

    +import com.google.common.collect.Iterators;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.conf.Configuration;
    -import org.apache.hadoop.fs.FileStatus;
      import org.apache.hadoop.fs.permission.FsAction;
      import org.apache.hadoop.fs.permission.FsPermission;
    -import org.apache.hadoop.hive.shims.ShimLoader;
      import org.apache.hadoop.hive.shims.Utils;
      import org.apache.hadoop.security.UserGroupInformation;

    @@ -47,7 +46,7 @@ public class DefaultFileAccess {

        private static Log LOG = LogFactory.getLog(DefaultFileAccess.class);

    - private static List<String> emptyGroups = new ArrayList<String>(0);
    + private static List<String> emptyGroups = Collections.emptyList();

        public static void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action)
            throws IOException, AccessControlException, LoginException {
    @@ -60,34 +59,62 @@ public class DefaultFileAccess {

        public static void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action,
            String user, List<String> groups) throws IOException, AccessControlException {
    + checkFileAccess(fs, Iterators.singletonIterator(stat), EnumSet.of(action), user, groups);
    + }
    +
    + public static void checkFileAccess(FileSystem fs, Iterator<FileStatus> statuses, EnumSet<FsAction> actions,
    + String user, List<String> groups)
    + throws IOException, AccessControlException {

          if (groups == null) {
            groups = emptyGroups;
          }

    + // Short-circuit for super-users.
          String superGroupName = getSuperGroupName(fs.getConf());
          if (userBelongsToSuperGroup(superGroupName, groups)) {
            LOG.info("User \"" + user + "\" belongs to super-group \"" + superGroupName + "\". " +
    - "Permission granted for action: " + action + ".");
    + "Permission granted for actions: " + actions + ".");
            return;
          }

    - final FsPermission dirPerms = stat.getPermission();
    - final String grp = stat.getGroup();
    + while (statuses.hasNext()) {

    - if (user.equals(stat.getOwner())) {
    - if (dirPerms.getUserAction().implies(action)) {
    - return;
    - }
    - } else if (groups.contains(grp)) {
    - if (dirPerms.getGroupAction().implies(action)) {
    - return;
    + FileStatus stat = statuses.next();
    + final FsPermission dirPerms = stat.getPermission();
    + final String grp = stat.getGroup();
    +
    + FsAction combinedAction = combine(actions);
    + if (user.equals(stat.getOwner())) {
    + if (dirPerms.getUserAction().implies(combinedAction)) {
    + continue;
    + }
    + } else if (groups.contains(grp)) {
    + if (dirPerms.getGroupAction().implies(combinedAction)) {
    + continue;
    + }
    + } else if (dirPerms.getOtherAction().implies(combinedAction)) {
    + continue;
            }
    - } else if (dirPerms.getOtherAction().implies(action)) {
    - return;
    +
    + throw new AccessControlException("action " + combinedAction + " not permitted on path "
    + + stat.getPath() + " for user " + user);
    +
    + } // for_each(fileStatus);
    + }
    +
    + private static FsAction combine(EnumSet<FsAction> actions) {
    + FsAction resultantAction = FsAction.NONE;
    + for (FsAction action : actions) {
    + resultantAction = resultantAction.or(action);
          }
    - throw new AccessControlException("action " + action + " not permitted on path "
    - + stat.getPath() + " for user " + user);
    + return resultantAction;
    + }
    +
    + public static void checkFileAccess(FileSystem fs, Iterator<FileStatus> statuses, EnumSet<FsAction> actions)
    + throws IOException, AccessControlException, LoginException {
    + UserGroupInformation ugi = Utils.getUGI();
    + checkFileAccess(fs, statuses, actions, ugi.getShortUserName(), Arrays.asList(ugi.getGroupNames()));
        }

        private static String getSuperGroupName(Configuration configuration) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    ----------------------------------------------------------------------
    diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    index 5a6bc44..4b79d95 100644
    --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
    @@ -24,19 +24,17 @@ import java.net.URI;
      import java.nio.ByteBuffer;
      import java.security.AccessControlException;
      import java.security.NoSuchAlgorithmException;
    -import java.security.PrivilegedExceptionAction;
      import java.util.Comparator;
    +import java.util.EnumSet;
    +import java.util.Iterator;
      import java.util.List;
      import java.util.Map;
    -import java.util.Set;
      import java.util.TreeMap;

    -import javax.security.auth.login.LoginException;

      import com.google.common.annotations.VisibleForTesting;

    -import org.apache.commons.logging.Log;
    -import org.apache.commons.logging.LogFactory;
    +import com.google.common.annotations.VisibleForTesting;
      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.BlockLocation;
      import org.apache.hadoop.fs.FSDataInputStream;
    @@ -47,7 +45,6 @@ import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.PathFilter;
      import org.apache.hadoop.fs.permission.FsAction;
      import org.apache.hadoop.fs.permission.FsPermission;
    -import org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue;
      import org.apache.hadoop.io.LongWritable;
      import org.apache.hadoop.mapred.ClusterStatus;
      import org.apache.hadoop.mapred.JobConf;
    @@ -530,6 +527,21 @@ public interface HadoopShims {
            throws IOException, AccessControlException, Exception;

        /**
    + * Check if the configured UGI has access to the path for the given file system action.
    + * Method will return successfully if action is permitted. AccessControlExceptoin will
    + * be thrown if user does not have access to perform the action. Other exceptions may
    + * be thrown for non-access related errors.
    + * @param fs The FileSystem instance
    + * @param statuses The FileStatuses for the paths being checked
    + * @param actions The FsActions being checked
    + * @throws IOException
    + * @throws AccessControlException
    + * @throws Exception
    + */
    + public void checkFileAccess(FileSystem fs, Iterator<FileStatus> statuses, EnumSet<FsAction> actions)
    + throws Exception;
    +
    + /**
         * Use password API (if available) to fetch credentials/password
         * @param conf
         * @param name

    http://git-wip-us.apache.org/repos/asf/hive/blob/19886150/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    ----------------------------------------------------------------------
    diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    index 89d7798..8e51c02 100644
    --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
    @@ -25,7 +25,9 @@ import java.net.URI;
      import java.security.AccessControlException;
      import java.util.ArrayList;
      import java.util.Collections;
    +import java.util.EnumSet;
      import java.util.HashSet;
    +import java.util.Iterator;
      import java.util.Set;

      import org.apache.commons.lang.ArrayUtils;
    @@ -391,5 +393,11 @@ public abstract class HadoopShimsSecure implements HadoopShims {
        }

        @Override
    + public void checkFileAccess(FileSystem fs, Iterator<FileStatus> statuses, EnumSet<FsAction> action)
    + throws IOException, AccessControlException, Exception {
    + DefaultFileAccess.checkFileAccess(fs, statuses, action);
    + }
    +
    + @Override
        abstract public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException;
      }
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10456: Grace Hash Join should not load spilled partitions on abort (Prasanth Jayachandran reviewed by Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/07fcb098
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/07fcb098
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/07fcb098

    Branch: refs/heads/llap
    Commit: 07fcb098b63003cf74718351269c79870100b8de
    Parents: 77b7fc3
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Sat May 2 17:40:01 2015 -0700
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Sat May 2 17:40:01 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/exec/MapJoinOperator.java | 136 +++++++++++--------
      .../apache/hadoop/hive/ql/exec/ObjectCache.java | 7 +
      .../hadoop/hive/ql/exec/mr/ObjectCache.java | 5 +
      .../persistence/HybridHashTableContainer.java | 35 ++++-
      .../hive/ql/exec/tez/HashTableLoader.java | 5 -
      .../hadoop/hive/ql/exec/tez/ObjectCache.java | 6 +
      .../mapjoin/VectorMapJoinRowBytesContainer.java | 2 +-
      7 files changed, 131 insertions(+), 65 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    index f2b800a..1cfc411 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    @@ -284,7 +284,17 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

          perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE);
          loader.init(mapContext, mrContext, hconf, this);
    - loader.load(mapJoinTables, mapJoinTableSerdes);
    + try {
    + loader.load(mapJoinTables, mapJoinTableSerdes);
    + } catch (HiveException e) {
    + if (isLogInfoEnabled) {
    + LOG.info("Exception loading hash tables. Clearing partially loaded hash table containers.");
    + }
    +
    + // there could be some spilled partitions which needs to be cleaned up
    + clearAllTableContainers();
    + throw e;
    + }

          hashTblInitedOnce = true;

    @@ -433,7 +443,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
        @Override
        public void closeOp(boolean abort) throws HiveException {
          boolean spilled = false;
    - for (MapJoinTableContainer container: mapJoinTables) {
    + for (MapJoinTableContainer container : mapJoinTables) {
            if (container != null) {
              spilled = spilled || container.hasSpill();
              container.dumpMetrics();
    @@ -442,79 +452,93 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem

          // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next
          if (spilled) {
    - if (hashMapRowGetters == null) {
    - hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
    - }
    - int numPartitions = 0;
    - // Find out number of partitions for each small table (should be same across tables)
    - for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    - if (pos != conf.getPosBigTable()) {
    - firstSmallTable = (HybridHashTableContainer)mapJoinTables[pos];
    - numPartitions = firstSmallTable.getHashPartitions().length;
    - break;
    + if (!abort) {
    + if (hashMapRowGetters == null) {
    + hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
              }
    - }
    - assert numPartitions != 0 : "Number of partitions must be greater than 0!";
    -
    - if (firstSmallTable.hasSpill()) {
    - spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length];
    - hybridMapJoinLeftover = true;
    -
    - // Clear all in-memory partitions first
    + int numPartitions = 0;
    + // Find out number of partitions for each small table (should be same across tables)
              for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    - MapJoinTableContainer tableContainer = mapJoinTables[pos];
    - if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) {
    - HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer;
    - hybridHtContainer.dumpStats();
    -
    - HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions();
    - // Clear all in memory partitions first
    - for (int i = 0; i < hashPartitions.length; i++) {
    - if (!hashPartitions[i].isHashMapOnDisk()) {
    - hybridHtContainer.setTotalInMemRowCount(
    - hybridHtContainer.getTotalInMemRowCount() -
    - hashPartitions[i].getHashMapFromMemory().getNumValues());
    - hashPartitions[i].getHashMapFromMemory().clear();
    + if (pos != conf.getPosBigTable()) {
    + firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos];
    + numPartitions = firstSmallTable.getHashPartitions().length;
    + break;
    + }
    + }
    + assert numPartitions != 0 : "Number of partitions must be greater than 0!";
    +
    + if (firstSmallTable.hasSpill()) {
    + spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length];
    + hybridMapJoinLeftover = true;
    +
    + // Clear all in-memory partitions first
    + for (byte pos = 0; pos < mapJoinTables.length; pos++) {
    + MapJoinTableContainer tableContainer = mapJoinTables[pos];
    + if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) {
    + HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer;
    + hybridHtContainer.dumpStats();
    +
    + HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions();
    + // Clear all in memory partitions first
    + for (int i = 0; i < hashPartitions.length; i++) {
    + if (!hashPartitions[i].isHashMapOnDisk()) {
    + hybridHtContainer.setTotalInMemRowCount(
    + hybridHtContainer.getTotalInMemRowCount() -
    + hashPartitions[i].getHashMapFromMemory().getNumValues());
    + hashPartitions[i].getHashMapFromMemory().clear();
    + }
                    }
    + assert hybridHtContainer.getTotalInMemRowCount() == 0;
                  }
    - assert hybridHtContainer.getTotalInMemRowCount() == 0;
                }
    - }

    - // Reprocess the spilled data
    - for (int i = 0; i < numPartitions; i++) {
    - HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
    - if (hashPartitions[i].isHashMapOnDisk()) {
    - try {
    - continueProcess(i); // Re-process spilled data
    - } catch (IOException e) {
    - e.printStackTrace();
    - } catch (SerDeException e) {
    - e.printStackTrace();
    - } catch (ClassNotFoundException e) {
    - e.printStackTrace();
    - }
    - for (byte pos = 0; pos < order.length; pos++) {
    - if (pos != conf.getPosBigTable())
    - spilledMapJoinTables[pos] = null;
    + // Reprocess the spilled data
    + for (int i = 0; i < numPartitions; i++) {
    + HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
    + if (hashPartitions[i].isHashMapOnDisk()) {
    + try {
    + continueProcess(i); // Re-process spilled data
    + } catch (Exception e) {
    + throw new HiveException(e);
    + }
    + for (byte pos = 0; pos < order.length; pos++) {
    + if (pos != conf.getPosBigTable())
    + spilledMapJoinTables[pos] = null;
    + }
                  }
                }
              }
            }
    +
    + if (isLogInfoEnabled) {
    + LOG.info("spilled: " + spilled + " abort: " + abort + ". Clearing spilled partitions.");
    + }
    +
    + // spilled tables are loaded always (no sharing), so clear it
    + clearAllTableContainers();
    + cache.remove(cacheKey);
          }

    + // in mapreduce case, we need to always clear up as mapreduce doesn't have object registry.
          if ((this.getExecContext() != null) && (this.getExecContext().getLocalWork() != null)
    - && (this.getExecContext().getLocalWork().getInputFileChangeSensitive())
    - && mapJoinTables != null) {
    + && (this.getExecContext().getLocalWork().getInputFileChangeSensitive())) {
    + if (isLogInfoEnabled) {
    + LOG.info("MR: Clearing all map join table containers.");
    + }
    + clearAllTableContainers();
    + }
    +
    + super.closeOp(abort);
    + }
    +
    + private void clearAllTableContainers() {
    + if (mapJoinTables != null) {
            for (MapJoinTableContainer tableContainer : mapJoinTables) {
              if (tableContainer != null) {
                tableContainer.clear();
              }
            }
          }
    - cache.release(cacheKey);
    - this.loader = null;
    - super.closeOp(abort);
        }

        /**

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
    index f0df2d3..440e0a1 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCache.java
    @@ -53,4 +53,11 @@ public interface ObjectCache {
         * @return the last cached object with the key, null if none.
         */
        public <T> Future<T> retrieveAsync(String key, Callable<T> fn) throws HiveException;
    +
    + /**
    + * Removes the specified key from the object cache.
    + *
    + * @param key - key to be removed
    + */
    + public void remove(String key);
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
    index a6f698d..bf4ae8d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ObjectCache.java
    @@ -91,4 +91,9 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
            }
          };
        }
    +
    + @Override
    + public void remove(String key) {
    + // nothing to do
    + }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    index 3f6d61e..412226e 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
    @@ -183,6 +183,36 @@ public class HybridHashTableContainer
          public boolean isHashMapOnDisk() {
            return hashMapOnDisk;
          }
    +
    + public void clear() {
    + if (hashMap != null) {
    + hashMap.clear();
    + hashMap = null;
    + }
    +
    + if (hashMapLocalPath != null) {
    + try {
    + Files.delete(hashMapLocalPath);
    + } catch (Throwable ignored) {
    + }
    + hashMapLocalPath = null;
    + }
    +
    + if (sidefileKVContainer != null) {
    + sidefileKVContainer.clear();
    + sidefileKVContainer = null;
    + }
    +
    + if (matchfileObjContainer != null) {
    + matchfileObjContainer.clear();
    + matchfileObjContainer = null;
    + }
    +
    + if (matchfileRowBytesContainer != null) {
    + matchfileRowBytesContainer.clear();
    + matchfileRowBytesContainer = null;
    + }
    + }
        }

        public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable,
    @@ -546,12 +576,11 @@ public class HybridHashTableContainer
          return toSpillPartitionId;
        }

    - /* Clean up in memory hashtables */
        @Override
        public void clear() {
          for (HashPartition hp : hashPartitions) {
    - if (hp.hashMap != null) {
    - hp.hashMap.clear();
    + if (hp != null) {
    + hp.clear();
            }
          }
          memoryUsed = 0;

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    index 6a81f11..536b92c 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
    @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
      import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
      import org.apache.hadoop.hive.ql.metadata.HiveException;
      import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
    -import org.apache.hadoop.hive.serde2.SerDeException;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
      import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    @@ -181,10 +180,6 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
              }
              tableContainer.seal();
              mapJoinTables[pos] = tableContainer;
    - } catch (IOException e) {
    - throw new HiveException(e);
    - } catch (SerDeException e) {
    - throw new HiveException(e);
            } catch (Exception e) {
              throw new HiveException(e);
            }

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java
    index c0bcb21..64295d4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ObjectCache.java
    @@ -93,4 +93,10 @@ public class ObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
            }
          });
        }
    +
    + @Override
    + public void remove(String key) {
    + LOG.info("Removing key: " + key);
    + registry.delete(key);
    + }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/07fcb098/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
    index c8359d3..1c91be6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
    @@ -290,7 +290,7 @@ public class VectorMapJoinRowBytesContainer {
          return currentLength;
        }

    - public void clear() throws IOException {
    + public void clear() {
          if (fileInputStream != null) {
            try {
              fileInputStream.close();
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10140 : Window boundary is not compared correctly (Aihua Xu via Ashutosh Chauhan)

    Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cccaa550
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cccaa550
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cccaa550

    Branch: refs/heads/llap
    Commit: cccaa5509e2f9948d6dd667b4d8fd6135469c806
    Parents: f895b27
    Author: Aihua Xu <aihuaxu@gmail.com>
    Authored: Thu Apr 30 09:42:00 2015 -0700
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Tue May 5 10:03:40 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/parse/WindowingSpec.java | 7 +-
      .../clientpositive/windowing_windowspec.q | 2 +
      .../clientpositive/windowing_windowspec.q.out | 108 +++++++++++++++++++
      3 files changed, 115 insertions(+), 2 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/cccaa550/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
    index 4fbb8b7..6dfa214 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java
    @@ -614,8 +614,10 @@ public class WindowingSpec {
            if (c != 0) {
              return c;
            }
    +
            RangeBoundarySpec rb = (RangeBoundarySpec) other;
    - return amt - rb.amt;
    + // Valid range is "range/rows between 10 preceding and 2 preceding" for preceding case
    + return this.direction == Direction.PRECEDING ? rb.amt - amt : amt - rb.amt;
          }

        }
    @@ -713,7 +715,8 @@ public class WindowingSpec {
              return c;
            }
            ValueBoundarySpec vb = (ValueBoundarySpec) other;
    - return amt - vb.amt;
    + // Valid range is "range/rows between 10 preceding and 2 preceding" for preceding case
    + return this.direction == Direction.PRECEDING ? vb.amt - amt : amt - vb.amt;
          }

        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/cccaa550/ql/src/test/queries/clientpositive/windowing_windowspec.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec.q b/ql/src/test/queries/clientpositive/windowing_windowspec.q
    index 63f97b7..202eb74 100644
    --- a/ql/src/test/queries/clientpositive/windowing_windowspec.q
    +++ b/ql/src/test/queries/clientpositive/windowing_windowspec.q
    @@ -31,6 +31,8 @@ select s, sum(i) over(partition by ts order by s) from over10k limit 100;

      select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100;

    +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100;
    +
      select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7;

      select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7;

    http://git-wip-us.apache.org/repos/asf/hive/blob/cccaa550/ql/src/test/results/clientpositive/windowing_windowspec.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/windowing_windowspec.q.out b/ql/src/test/results/clientpositive/windowing_windowspec.q.out
    index 8d78c22..66b0b52 100644
    --- a/ql/src/test/results/clientpositive/windowing_windowspec.q.out
    +++ b/ql/src/test/results/clientpositive/windowing_windowspec.q.out
    @@ -800,6 +800,114 @@ POSTHOOK: Input: default@over10k
      71.68 722.6499947607517
      79.46 802.1099938452244
      80.02 882.1299904882908
    +PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@over10k
    +#### A masked pattern was here ####
    +POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@over10k
    +#### A masked pattern was here ####
    +3.17 14.0600004196167
    +10.89 28.600000381469727
    +14.54 43.38000011444092
    +14.78 58.0600004196167
    +17.85 67.78000068664551
    +20.61 81.9300012588501
    +28.69 96.3700008392334
    +29.22 109.69000053405762
    +31.17 127.42999839782715
    +38.35 137.3499984741211
    +38.61 147.60999870300293
    +39.48 156.97999954223633
    +40.54 160.22999954223633
    +41.6 167.70000076293945
    +46.08 182.5800018310547
    +54.36 198.97999954223633
    +56.94 222.3400001525879
    +64.96 249.7799949645996
    +73.52 273.99999618530273
    +78.58 298.4700012207031
    +81.41 318.2200012207031
    +84.71 332.1300048828125
    +87.43 344.9100036621094
    +91.36 356.45999908447266
    +92.96 366.79000091552734
    +95.04 279.36000061035156
    +0.83 2.8199999928474426
    +1.99 6.550000011920929
    +3.73 15.409999668598175
    +8.86 25.199999570846558
    +10.62 34.52999925613403
    +11.32 43.6299991607666
    +12.83 49.46999931335449
    +14.7 53.80999946594238
    +14.96 60.06999969482422
    +17.58 66.34000015258789
    +19.1 72.65000057220459
    +21.01 84.64000129699707
    +26.95 94.29000091552734
    +27.23 104.26000022888184
    +29.07 112.95999908447266
    +29.71 117.8499984741211
    +31.84 122.55999946594238
    +31.94 128.80999946594238
    +35.32 136.42000007629395
    +37.32 143.07999992370605
    +38.5 153.22000122070312
    +42.08 162.20000076293945
    +44.3 169.54000091552734
    +44.66 177.88000106811523
    +46.84 184.68999862670898
    +48.89 190.02999877929688
    +49.64 195.64999771118164
    +50.28 200.89999771118164
    +52.09 205.2699966430664
    +53.26 209.71999740600586
    +54.09 215.88999938964844
    +56.45 220.55999755859375
    +56.76 228.70999908447266
    +61.41 236.5
    +61.88 243.07999801635742
    +63.03 250.87000274658203
    +64.55 258.08000564575195
    +68.62 272.3300018310547
    +76.13 288.3500061035156
    +79.05 304.2300033569336
    +80.43 317.02000427246094
    +81.41 323.74000549316406
    +82.85 328.67000579833984
    +83.98 332.4500045776367
    +84.21 336.59000396728516
    +85.55 341.67000579833984
    +87.93 346.62000274658203
    +88.93 356.6800003051758
    +94.27 370.57999420166016
    +99.45 282.6499938964844
    +0.36 0.8400000035762787
    +0.48 1.6300000250339508
    +0.79 2.9000000059604645
    +1.27 7.020000010728836
    +4.48 15.540000021457672
    +9.0 38.02000045776367
    +23.27 61.87999963760376
    +25.13 82.73999977111816
    +25.34 99.64999961853027
    +25.91 105.38999938964844
    +29.01 110.72999954223633
    +30.47 123.34000015258789
    +37.95 136.72999954223633
    +39.3 153.6299991607666
    +45.91 175.5999984741211
    +52.44 191.74999618530273
    +54.1 209.14999771118164
    +56.7 222.0099983215332
    +58.77 231.6599998474121
    +62.09 245.7599983215332
    +68.2 260.73999786376953
    +71.68 281.4299964904785
    +79.46 299.35999298095703
    +80.02 312.4499969482422
      PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7
      PREHOOK: type: QUERY
      PREHOOK: Input: default@over10k
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10583 : Switch precommit from ASF to Github repo to avoid clone failures (Szehon, reviewed by Xuefu)"


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2baa1e99
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2baa1e99
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2baa1e99

    Branch: refs/heads/llap
    Commit: 2baa1e99c474901f7a123c70d6abaadd6711c599
    Parents: 07fcb09
    Author: Szehon Ho <szehon@cloudera.com>
    Authored: Sun May 3 00:26:51 2015 -0700
    Committer: Szehon Ho <szehon@cloudera.com>
    Committed: Sun May 3 00:27:40 2015 -0700

    ----------------------------------------------------------------------
      dev-support/jenkins-execute-build.sh | 2 +-
      dev-support/jenkins-execute-hms-test.sh | 4 ++--
      2 files changed, 3 insertions(+), 3 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/2baa1e99/dev-support/jenkins-execute-build.sh
    ----------------------------------------------------------------------
    diff --git a/dev-support/jenkins-execute-build.sh b/dev-support/jenkins-execute-build.sh
    index 492aa5f..3b41b0b 100644
    --- a/dev-support/jenkins-execute-build.sh
    +++ b/dev-support/jenkins-execute-build.sh
    @@ -51,7 +51,7 @@ fi
      test -d hive/build/ || mkdir -p hive/build/
      cd hive/build/
      rm -rf hive
    -git clone --depth 1 https://git-wip-us.apache.org/repos/asf/hive.git
    +git clone --depth 1 https://github.com/apache/hive.git
      cd hive/testutils/ptest2

      mvn clean package -DskipTests -Drat.numUnapprovedLicenses=1000 -Dmaven.repo.local=$WORKSPACE/.m2

    http://git-wip-us.apache.org/repos/asf/hive/blob/2baa1e99/dev-support/jenkins-execute-hms-test.sh
    ----------------------------------------------------------------------
    diff --git a/dev-support/jenkins-execute-hms-test.sh b/dev-support/jenkins-execute-hms-test.sh
    index 584f517..52a9453 100644
    --- a/dev-support/jenkins-execute-hms-test.sh
    +++ b/dev-support/jenkins-execute-hms-test.sh
    @@ -95,7 +95,7 @@ build_ptest2() {

       test -d $path || mkdir -p $path
       rm -rf $path
    - git clone --depth 1 -b $BRANCH https://git-wip-us.apache.org/repos/asf/hive.git $path/ || return 1
    + git clone --depth 1 -b $BRANCH https://github.com/apache/hive.git $path/ || return 1
       cd $path/testutils/ptest2
       mvn clean package -DskipTests -Drat.numUnapprovedLicenses=1000 -Dmaven.repo.local=$WORKSPACE/.m2 || return 1

    @@ -170,7 +170,7 @@ create_publish_file() {
      if patch_contains_hms_upgrade "$PATCH_URL"; then
       ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i $SSH_KEY $SSH_HOST "
        rm -rf hive/ &&
    - git clone --depth 1 -b $BRANCH https://git-wip-us.apache.org/repos/asf/hive.git &&
    + git clone --depth 1 -b $BRANCH https://github.com/apache/hive.git &&
        cd hive/ &&
        curl ${PATCH_URL} | bash -x testutils/ptest2/src/main/resources/smart-apply-patch.sh - &&
        sudo bash -x testutils/metastore/execute-test-on-lxc.sh --patch \"${PATCH_URL}\" --branch $BRANCH
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-8890: HiveServer2 dynamic service discovery: use persistent ephemeral nodes curator recipe (Vaibhav Gumashta reviewed by Thejas Nair)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/652febcd
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/652febcd
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/652febcd

    Branch: refs/heads/llap
    Commit: 652febcdab727f39c05d6b5b3c0a6526d254ee0e
    Parents: cccaa55
    Author: Vaibhav Gumashta <vgumashta@apache.org>
    Authored: Tue May 5 10:37:51 2015 -0700
    Committer: Vaibhav Gumashta <vgumashta@apache.org>
    Committed: Tue May 5 10:37:51 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
      pom.xml | 5 +
      service/pom.xml | 5 +
      .../cli/thrift/ThriftBinaryCLIService.java | 1 -
      .../apache/hive/service/server/HiveServer2.java | 106 +++++++++++++++----
      5 files changed, 97 insertions(+), 22 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/652febcd/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index f04ce82..5d4dbea 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -1410,7 +1410,7 @@ public class HiveConf extends Configuration {
              "The port of ZooKeeper servers to talk to.\n" +
              "If the list of Zookeeper servers specified in hive.zookeeper.quorum\n" +
              "does not contain port numbers, this value is used."),
    - HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", "600000ms",
    + HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", "1200000ms",
              new TimeValidator(TimeUnit.MILLISECONDS),
              "ZooKeeper client's session timeout (in milliseconds). The client is disconnected, and as a result, all locks released, \n" +
              "if a heartbeat is not sent in the timeout."),

    http://git-wip-us.apache.org/repos/asf/hive/blob/652febcd/pom.xml
    ----------------------------------------------------------------------
    diff --git a/pom.xml b/pom.xml
    index acacf81..1921b06 100644
    --- a/pom.xml
    +++ b/pom.xml
    @@ -512,6 +512,11 @@
              <version>${curator.version}</version>
            </dependency>
            <dependency>
    + <groupId>org.apache.curator</groupId>
    + <artifactId>curator-recipes</artifactId>
    + <version>${curator.version}</version>
    + </dependency>
    + <dependency>
              <groupId>org.codehaus.groovy</groupId>
              <artifactId>groovy-all</artifactId>
              <version>${groovy.version}</version>

    http://git-wip-us.apache.org/repos/asf/hive/blob/652febcd/service/pom.xml
    ----------------------------------------------------------------------
    diff --git a/service/pom.xml b/service/pom.xml
    index c5815af..d8e3126 100644
    --- a/service/pom.xml
    +++ b/service/pom.xml
    @@ -91,6 +91,11 @@
            <artifactId>curator-framework</artifactId>
            <version>${curator.version}</version>
          </dependency>
    + <dependency>
    + <groupId>org.apache.curator</groupId>
    + <artifactId>curator-recipes</artifactId>
    + <version>${curator.version}</version>
    + </dependency>
          <!-- intra-project -->
          <dependency>
            <groupId>org.apache.hive</groupId>

    http://git-wip-us.apache.org/repos/asf/hive/blob/652febcd/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    index ca1eae6..6c9efba 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
    @@ -93,7 +93,6 @@ public class ThriftBinaryCLIService extends ThriftCLIService {
            // TCP Server
            server = new TThreadPoolServer(sargs);
            server.setServerEventHandler(serverEventHandler);
    - server.serve();
            String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
                + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
            LOG.info(msg);

    http://git-wip-us.apache.org/repos/asf/hive/blob/652febcd/service/src/java/org/apache/hive/service/server/HiveServer2.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
    index dc2217f..58e8e49 100644
    --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
    +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
    @@ -23,6 +23,8 @@ import java.nio.charset.Charset;
      import java.util.ArrayList;
      import java.util.List;
      import java.util.Properties;
    +import java.util.concurrent.CountDownLatch;
    +import java.util.concurrent.TimeUnit;

      import org.apache.commons.cli.GnuParser;
      import org.apache.commons.cli.HelpFormatter;
    @@ -35,6 +37,10 @@ import org.apache.commons.logging.LogFactory;
      import org.apache.curator.framework.CuratorFramework;
      import org.apache.curator.framework.CuratorFrameworkFactory;
      import org.apache.curator.framework.api.ACLProvider;
    +import org.apache.curator.framework.api.BackgroundCallback;
    +import org.apache.curator.framework.api.CuratorEvent;
    +import org.apache.curator.framework.api.CuratorEventType;
    +import org.apache.curator.framework.recipes.nodes.PersistentEphemeralNode;
      import org.apache.curator.retry.ExponentialBackoffRetry;
      import org.apache.hadoop.hive.common.LogUtils;
      import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
    @@ -67,9 +73,11 @@ import org.apache.zookeeper.data.ACL;
       */
      public class HiveServer2 extends CompositeService {
        private static final Log LOG = LogFactory.getLog(HiveServer2.class);
    + private static CountDownLatch deleteSignal;

        private CLIService cliService;
        private ThriftCLIService thriftCLIService;
    + private PersistentEphemeralNode znode;
        private String znodePath;
        private CuratorFramework zooKeeperClient;
        private boolean registeredWithZooKeeper = false;
    @@ -151,12 +159,19 @@ public class HiveServer2 extends CompositeService {
          String instanceURI = getServerInstanceURI(hiveConf);
          byte[] znodeDataUTF8 = instanceURI.getBytes(Charset.forName("UTF-8"));
          setUpZooKeeperAuth(hiveConf);
    + int sessionTimeout =
    + (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT,
    + TimeUnit.MILLISECONDS);
    + int baseSleepTime =
    + (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME,
    + TimeUnit.MILLISECONDS);
    + int maxRetries = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES);
          // Create a CuratorFramework instance to be used as the ZooKeeper client
          // Use the zooKeeperAclProvider to create appropriate ACLs
          zooKeeperClient =
              CuratorFrameworkFactory.builder().connectString(zooKeeperEnsemble)
    - .aclProvider(zooKeeperAclProvider).retryPolicy(new ExponentialBackoffRetry(1000, 3))
    - .build();
    + .sessionTimeoutMs(sessionTimeout).aclProvider(zooKeeperAclProvider)
    + .retryPolicy(new ExponentialBackoffRetry(baseSleepTime, maxRetries)).build();
          zooKeeperClient.start();
          // Create the parent znodes recursively; ignore if the parent already exists.
          try {
    @@ -176,18 +191,28 @@ public class HiveServer2 extends CompositeService {
                ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace
                    + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + "serverUri=" + instanceURI + ";"
                    + "version=" + HiveVersionInfo.getVersion() + ";" + "sequence=";
    - znodePath =
    - zooKeeperClient.create().creatingParentsIfNeeded()
    - .withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(pathPrefix, znodeDataUTF8);
    + znode =
    + new PersistentEphemeralNode(zooKeeperClient,
    + PersistentEphemeralNode.Mode.EPHEMERAL_SEQUENTIAL, pathPrefix, znodeDataUTF8);
    + znode.start();
    + // We'll wait for 120s for node creation
    + long znodeCreationTimeout = 120;
    + if (!znode.waitForInitialCreate(znodeCreationTimeout, TimeUnit.SECONDS)) {
    + throw new Exception("Max znode creation wait time: " + znodeCreationTimeout + "s exhausted");
    + }
            setRegisteredWithZooKeeper(true);
    + znodePath = znode.getActualPath();
            // Set a watch on the znode
            if (zooKeeperClient.checkExists().usingWatcher(new DeRegisterWatcher()).forPath(znodePath) == null) {
              // No node exists, throw exception
              throw new Exception("Unable to create znode for this HiveServer2 instance on ZooKeeper.");
            }
            LOG.info("Created a znode on ZooKeeper for HiveServer2 uri: " + instanceURI);
    - } catch (KeeperException e) {
    + } catch (Exception e) {
            LOG.fatal("Unable to create a znode for this server instance", e);
    + if (znode != null) {
    + znode.close();
    + }
            throw (e);
          }
        }
    @@ -223,22 +248,33 @@ public class HiveServer2 extends CompositeService {
          @Override
          public void process(WatchedEvent event) {
            if (event.getType().equals(Watcher.Event.EventType.NodeDeleted)) {
    - HiveServer2.this.setRegisteredWithZooKeeper(false);
    - // If there are no more active client sessions, stop the server
    - if (cliService.getSessionManager().getOpenSessionCount() == 0) {
    - LOG.warn("This instance of HiveServer2 has been removed from the list of server "
    - + "instances available for dynamic service discovery. "
    - + "The last client session has ended - will shutdown now.");
    - HiveServer2.this.stop();
    + if (znode != null) {
    + try {
    + znode.close();
    + LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. "
    + + "The server will be shut down after the last client sesssion completes.");
    + } catch (IOException e) {
    + LOG.error("Failed to close the persistent ephemeral znode", e);
    + } finally {
    + HiveServer2.this.setRegisteredWithZooKeeper(false);
    + // If there are no more active client sessions, stop the server
    + if (cliService.getSessionManager().getOpenSessionCount() == 0) {
    + LOG.warn("This instance of HiveServer2 has been removed from the list of server "
    + + "instances available for dynamic service discovery. "
    + + "The last client session has ended - will shutdown now.");
    + HiveServer2.this.stop();
    + }
    + }
              }
    - LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. "
    - + "The server will be shut down after the last client sesssion completes.");
            }
          }
        }

        private void removeServerInstanceFromZooKeeper() throws Exception {
          setRegisteredWithZooKeeper(false);
    + if (znode != null) {
    + znode.close();
    + }
          zooKeeperClient.close();
          LOG.info("Server instance removed from ZooKeeper.");
        }
    @@ -359,25 +395,53 @@ public class HiveServer2 extends CompositeService {
          HiveConf hiveConf = new HiveConf();
          String zooKeeperEnsemble = ZooKeeperHiveHelper.getQuorumServers(hiveConf);
          String rootNamespace = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_ZOOKEEPER_NAMESPACE);
    + int baseSleepTime = (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME, TimeUnit.MILLISECONDS);
    + int maxRetries = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES);
          CuratorFramework zooKeeperClient =
              CuratorFrameworkFactory.builder().connectString(zooKeeperEnsemble)
    - .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build();
    + .retryPolicy(new ExponentialBackoffRetry(baseSleepTime, maxRetries)).build();
          zooKeeperClient.start();
          List<String> znodePaths =
              zooKeeperClient.getChildren().forPath(
                  ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace);
    + List<String> znodePathsUpdated;
          // Now for each path that is for the given versionNumber, delete the znode from ZooKeeper
    - for (String znodePath : znodePaths) {
    + for (int i = 0; i < znodePaths.size(); i++) {
    + String znodePath = znodePaths.get(i);
    + deleteSignal = new CountDownLatch(1);
            if (znodePath.contains("version=" + versionNumber + ";")) {
    - LOG.info("Removing the znode: " + znodePath + " from ZooKeeper");
    - zooKeeperClient.delete().forPath(
    + String fullZnodePath =
                  ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace
    - + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + znodePath);
    + + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + znodePath;
    + LOG.warn("Will attempt to remove the znode: " + fullZnodePath + " from ZooKeeper");
    + System.out.println("Will attempt to remove the znode: " + fullZnodePath + " from ZooKeeper");
    + zooKeeperClient.delete().guaranteed().inBackground(new DeleteCallBack())
    + .forPath(fullZnodePath);
    + // Wait for the delete to complete
    + deleteSignal.await();
    + // Get the updated path list
    + znodePathsUpdated =
    + zooKeeperClient.getChildren().forPath(
    + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace);
    + // Gives a list of any new paths that may have been created to maintain the persistent ephemeral node
    + znodePathsUpdated.removeAll(znodePaths);
    + // Add the new paths to the znodes list. We'll try for their removal as well.
    + znodePaths.addAll(znodePathsUpdated);
            }
          }
          zooKeeperClient.close();
        }

    + private static class DeleteCallBack implements BackgroundCallback {
    + @Override
    + public void processResult(CuratorFramework zooKeeperClient, CuratorEvent event)
    + throws Exception {
    + if (event.getType() == CuratorEventType.DELETE) {
    + deleteSignal.countDown();
    + }
    + }
    + }
    +
        public static void main(String[] args) {
          HiveConf.setLoadHiveServer2Config(true);
          try {
    @@ -547,6 +611,8 @@ public class HiveServer2 extends CompositeService {
            } catch (Exception e) {
              LOG.fatal("Error deregistering HiveServer2 instances for version: " + versionNumber
                  + " from ZooKeeper", e);
    + System.out.println("Error deregistering HiveServer2 instances for version: " + versionNumber
    + + " from ZooKeeper." + e);
              System.exit(-1);
            }
            System.exit(0);
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10579 : Fix hadoop-1 build (Xuefu via Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8eb0ede8
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8eb0ede8
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8eb0ede8

    Branch: refs/heads/llap
    Commit: 8eb0ede826663d02e8972a26145e8b55f2349995
    Parents: 2baa1e9
    Author: Szehon Ho <szehon@cloudera.com>
    Authored: Sun May 3 17:06:36 2015 -0700
    Committer: Szehon Ho <szehon@cloudera.com>
    Committed: Sun May 3 17:06:36 2015 -0700

    ----------------------------------------------------------------------
      .../hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java | 3 ++-
      1 file changed, 2 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/8eb0ede8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
    index 7cb0f15..3c8c0d6 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
    @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
      import org.apache.hadoop.hive.ql.plan.OperatorDesc;
      import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
      import org.apache.hadoop.hive.ql.plan.SelectDesc;
    +import org.apache.hadoop.hive.shims.ShimLoader;

      /**
       * This transformation does optimization for enforcing bucketing and sorting.
    @@ -216,7 +217,7 @@ public class BucketingSortingReduceSinkOptimizer implements Transform {
          private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) {
            Map<String, Integer> bucketFileNameMapping = new HashMap<String, Integer>();
            for (int pos = 0; pos < srcs.length; pos++) {
    - if(!srcs[pos].isFile()) {
    + if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) {
                throw new RuntimeException("Was expecting '" + srcs[pos].getPath() + "' to be bucket file.");
              }
              bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos);
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-8915 Log file explosion due to non-existence of COMPACTION_QUEUE table (Alan Gates, reviewed by Eugene Koifman)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c40c6deb
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c40c6deb
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c40c6deb

    Branch: refs/heads/llap
    Commit: c40c6deb65f30897b9967439edef47a5e740c26e
    Parents: 3f72f81
    Author: Alan Gates <gates@hortonworks.com>
    Authored: Tue May 5 09:10:33 2015 -0700
    Committer: Alan Gates <gates@hortonworks.com>
    Committed: Tue May 5 09:10:33 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/txn/compactor/Cleaner.java | 18 ++++++++----
      .../hadoop/hive/ql/txn/compactor/Worker.java | 30 ++++++++++++++------
      2 files changed, 33 insertions(+), 15 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/c40c6deb/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
    index 0fb39f7..83b0d3d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
    @@ -70,10 +70,10 @@ public class Cleaner extends CompactorThread {
            // and if so remembers that and then sets it to true at the end. We have to check here
            // first to make sure we go through a complete iteration of the loop before resetting it.
            boolean setLooped = !looped.get();
    + long startedAt = System.currentTimeMillis();
            // Make sure nothing escapes this run method and kills the metastore at large,
            // so wrap it in a big catch Throwable statement.
            try {
    - long startedAt = System.currentTimeMillis();

              // First look for all the compactions that are waiting to be cleaned. If we have not
              // seen an entry before, look for all the locks held on that table or partition and
    @@ -134,11 +134,6 @@ public class Cleaner extends CompactorThread {
                  }
                }
              }
    -
    - // Now, go back to bed until it's time to do this again
    - long elapsedTime = System.currentTimeMillis() - startedAt;
    - if (elapsedTime >= cleanerCheckInterval || stop.get()) continue;
    - else Thread.sleep(cleanerCheckInterval - elapsedTime);
            } catch (Throwable t) {
              LOG.error("Caught an exception in the main loop of compactor cleaner, " +
                  StringUtils.stringifyException(t));
    @@ -146,6 +141,17 @@ public class Cleaner extends CompactorThread {
            if (setLooped) {
              looped.set(true);
            }
    + // Now, go back to bed until it's time to do this again
    + long elapsedTime = System.currentTimeMillis() - startedAt;
    + if (elapsedTime >= cleanerCheckInterval || stop.get()) {
    + continue;
    + } else {
    + try {
    + Thread.sleep(cleanerCheckInterval - elapsedTime);
    + } catch (InterruptedException ie) {
    + // What can I do about it?
    + }
    + }
          } while (!stop.get());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/c40c6deb/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
    index 8cfa37e..3ce9ffd 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
    @@ -73,10 +73,11 @@ public class Worker extends CompactorThread {

        @Override
        public void run() {
    - // Make sure nothing escapes this run method and kills the metastore at large,
    - // so wrap it in a big catch Throwable statement.
    - try {
    - do {
    + do {
    + boolean launchedJob = false;
    + // Make sure nothing escapes this run method and kills the metastore at large,
    + // so wrap it in a big catch Throwable statement.
    + try {
              CompactionInfo ci = txnHandler.findNextToCompact(name);

              if (ci == null && !stop.get()) {
    @@ -143,6 +144,7 @@ public class Worker extends CompactorThread {
              final StatsUpdater su = StatsUpdater.init(ci, txnHandler.findColumnsWithStats(ci), conf,
                runJobAsSelf(runAs) ? runAs : t.getOwner());
              final CompactorMR mr = new CompactorMR();
    + launchedJob = true;
              try {
                if (runJobAsSelf(runAs)) {
                  mr.run(conf, jobName.toString(), t, sd, txns, isMajor, su);
    @@ -163,11 +165,21 @@ public class Worker extends CompactorThread {
                    ". Marking clean to avoid repeated failures, " + StringUtils.stringifyException(e));
                txnHandler.markCleaned(ci);
              }
    - } while (!stop.get());
    - } catch (Throwable t) {
    - LOG.error("Caught an exception in the main loop of compactor worker " + name +
    - ", exiting " + StringUtils.stringifyException(t));
    - }
    + } catch (Throwable t) {
    + LOG.error("Caught an exception in the main loop of compactor worker " + name + ", " +
    + StringUtils.stringifyException(t));
    + }
    +
    + // If we didn't try to launch a job it either means there was no work to do or we got
    + // here as the result of a communication failure with the DB. Either way we want to wait
    + // a bit before we restart the loop.
    + if (!launchedJob && !stop.get()) {
    + try {
    + Thread.sleep(SLEEP_TIME);
    + } catch (InterruptedException e) {
    + }
    + }
    + } while (!stop.get());
        }

        @Override
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10529: Remove references to tez task context before storing operator plan in object cache (Rajesh Balamohan reviewed by Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4aff07e3
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4aff07e3
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4aff07e3

    Branch: refs/heads/llap
    Commit: 4aff07e3e8da9b6f946df605e369f1054e76823a
    Parents: e2a12c9
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Tue May 5 11:04:54 2015 -0700
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Tue May 5 11:04:54 2015 -0700

    ----------------------------------------------------------------------
      ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java | 3 ++-
      1 file changed, 2 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/4aff07e3/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    index 1cfc411..b1352f3 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    @@ -77,7 +77,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
        private transient String cacheKey;
        private transient ObjectCache cache;

    - private HashTableLoader loader;
    + protected HashTableLoader loader;
        private boolean loadCalled;

        protected transient MapJoinTableContainer[] mapJoinTables;
    @@ -528,6 +528,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
            clearAllTableContainers();
          }

    + this.loader = null;
          super.closeOp(abort);
        }
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-9508: MetaStore client socket connection should have a lifetime (Thiruvel Thirumoolan reviewed by Vaibhav Gumashta)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2a12c9a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2a12c9a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2a12c9a

    Branch: refs/heads/llap
    Commit: e2a12c9a630d037f3e3aaf42acc873eac86bc9f3
    Parents: 652febc
    Author: Vaibhav Gumashta <vgumashta@apache.org>
    Authored: Tue May 5 10:44:16 2015 -0700
    Committer: Vaibhav Gumashta <vgumashta@apache.org>
    Committed: Tue May 5 10:44:16 2015 -0700

    ----------------------------------------------------------------------
      .../org/apache/hadoop/hive/conf/HiveConf.java | 6 ++++
      .../hive/metastore/TestHiveMetaStore.java | 30 ++++++++++++++++++++
      .../hive/metastore/RetryingMetaStoreClient.java | 27 +++++++++++++++---
      3 files changed, 59 insertions(+), 4 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/e2a12c9a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------
    diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    index 5d4dbea..69fda45 100644
    --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    @@ -119,6 +119,7 @@ public class HiveConf extends Configuration {
            HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES,
            HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY,
            HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT,
    + HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME,
            HiveConf.ConfVars.METASTOREPWD,
            HiveConf.ConfVars.METASTORECONNECTURLHOOK,
            HiveConf.ConfVars.METASTORECONNECTURLKEY,
    @@ -398,6 +399,11 @@ public class HiveConf extends Configuration {
          METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s",
              new TimeValidator(TimeUnit.SECONDS),
              "MetaStore Client socket timeout in seconds"),
    + METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s",
    + new TimeValidator(TimeUnit.SECONDS),
    + "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" +
    + "reconnects on the next MetaStore operation. A value of 0s means the connection\n" +
    + "has an infinite lifetime."),
          METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine",
              "password to use against metastore database"),
          METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "",

    http://git-wip-us.apache.org/repos/asf/hive/blob/e2a12c9a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    ----------------------------------------------------------------------
    diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    index 130fd67..dffeb34 100644
    --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    @@ -2867,4 +2867,34 @@ public abstract class TestHiveMetaStore extends TestCase {
              ownerName, ownerType, createTime, functionType, resources);
          client.createFunction(func);
        }
    +
    + public void testRetriableClientWithConnLifetime() throws Exception {
    +
    + HiveConf conf = new HiveConf(hiveConf);
    + conf.setLong(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME.name(), 60);
    + long timeout = 65 * 1000; // Lets use a timeout more than the socket lifetime to simulate a reconnect
    +
    + // Test a normal retriable client
    + IMetaStoreClient client = RetryingMetaStoreClient.getProxy(conf, getHookLoader(), HiveMetaStoreClient.class.getName());
    + client.getAllDatabases();
    + client.close();
    +
    + // Connect after the lifetime, there should not be any failures
    + client = RetryingMetaStoreClient.getProxy(conf, getHookLoader(), HiveMetaStoreClient.class.getName());
    + Thread.sleep(timeout);
    + client.getAllDatabases();
    + client.close();
    + }
    +
    + private HiveMetaHookLoader getHookLoader() {
    + HiveMetaHookLoader hookLoader = new HiveMetaHookLoader() {
    + @Override
    + public HiveMetaHook getHook(
    + org.apache.hadoop.hive.metastore.api.Table tbl)
    + throws MetaException {
    + return null;
    + }
    + };
    + return hookLoader;
    + }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/e2a12c9a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
    ----------------------------------------------------------------------
    diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
    index 77da6f7..1b6487a 100644
    --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
    +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
    @@ -54,9 +54,9 @@ public class RetryingMetaStoreClient implements InvocationHandler {
        private final int retryLimit;
        private final long retryDelaySeconds;
        private final Map<String, Long> metaCallTimeMap;
    -
    -
    -
    + private final long connectionLifeTimeInMillis;
    + private long lastConnectionTime;
    + private boolean localMetaStore;

        protected RetryingMetaStoreClient(HiveConf hiveConf, HiveMetaHookLoader hookLoader,
            Map<String, Long> metaCallTimeMap, Class<? extends IMetaStoreClient> msClientClass) throws MetaException {
    @@ -64,6 +64,11 @@ public class RetryingMetaStoreClient implements InvocationHandler {
          this.retryDelaySeconds = hiveConf.getTimeVar(
              HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, TimeUnit.SECONDS);
          this.metaCallTimeMap = metaCallTimeMap;
    + this.connectionLifeTimeInMillis =
    + hiveConf.getTimeVar(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME, TimeUnit.SECONDS) * 1000;
    + this.lastConnectionTime = System.currentTimeMillis();
    + String msUri = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS);
    + localMetaStore = (msUri == null) || msUri.trim().isEmpty();

          reloginExpiringKeytabUser();
          this.base = MetaStoreUtils.newInstance(msClientClass, new Class[] {
    @@ -104,8 +109,9 @@ public class RetryingMetaStoreClient implements InvocationHandler {
          while (true) {
            try {
              reloginExpiringKeytabUser();
    - if(retriesMade > 0){
    + if (retriesMade > 0 || hasConnectionLifeTimeReached(method)) {
                base.reconnect();
    + lastConnectionTime = System.currentTimeMillis();
              }
              if (metaCallTimeMap == null) {
                ret = method.invoke(base, args);
    @@ -171,6 +177,19 @@ public class RetryingMetaStoreClient implements InvocationHandler {
          return methodSb.toString();
        }

    + private boolean hasConnectionLifeTimeReached(Method method) {
    + if (connectionLifeTimeInMillis <= 0 || localMetaStore ||
    + method.getName().equalsIgnoreCase("close")) {
    + return false;
    + }
    + boolean shouldReconnect =
    + (System.currentTimeMillis() - lastConnectionTime) >= connectionLifeTimeInMillis;
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Reconnection status for Method: " + method.getName() + " is " + shouldReconnect);
    + }
    + return shouldReconnect;
    + }
    +
        /**
         * Relogin if login user is logged in using keytab
         * Relogin is actually done by ugi code only if sufficient time has passed
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10587 : ExprNodeColumnDesc should be created with isPartitionColOrVirtualCol true for DP column (Chaoyu Tang, reviewed by Ashutosh, via Szehon)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb3a665a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb3a665a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb3a665a

    Branch: refs/heads/llap
    Commit: bb3a665afa3f7b457085408e5789c462978a0b07
    Parents: 4aff07e
    Author: Szehon Ho <szehon@cloudera.com>
    Authored: Tue May 5 11:24:48 2015 -0700
    Committer: Szehon Ho <szehon@cloudera.com>
    Committed: Tue May 5 11:25:54 2015 -0700

    ----------------------------------------------------------------------
      ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
      1 file changed, 1 insertion(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/bb3a665a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    index dec0e38..cbc5466 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    @@ -6821,7 +6821,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
            for (int i = tableFields.size() + (updating() ? 1 : 0); i < rowFields.size(); ++i) {
              TypeInfo rowFieldTypeInfo = rowFields.get(i).getType();
              ExprNodeDesc column = new ExprNodeColumnDesc(
    - rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", false);
    + rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true);
              expressions.add(column);
            }
            // converted = true; // [TODO]: should we check & convert type to String and set it to true?
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10611: Mini tez tests wait for 5 minutes before shutting down (Vikram Dixit K, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/632a3090
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/632a3090
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/632a3090

    Branch: refs/heads/llap
    Commit: 632a30908d4680c0ee31ba04e4cfc9da3554e4f5
    Parents: 18fb460
    Author: vikram <vikram@hortonworks.com>
    Authored: Wed May 6 14:28:23 2015 -0700
    Committer: vikram <vikram@hortonworks.com>
    Committed: Wed May 6 14:30:14 2015 -0700

    ----------------------------------------------------------------------
      .../main/java/org/apache/hadoop/hive/ql/QTestUtil.java | 3 +++
      .../hadoop/hive/ql/exec/tez/TezSessionState.java | 13 +++++++------
      2 files changed, 10 insertions(+), 6 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/632a3090/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
    ----------------------------------------------------------------------
    diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
    index d1104b3..3e29d3c 100644
    --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
    +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
    @@ -430,6 +430,9 @@ public class QTestUtil {
            cleanUp();
          }

    + if (clusterType == MiniClusterType.tez) {
    + SessionState.get().getTezSession().close(false);
    + }
          setup.tearDown();
          if (sparkSession != null) {
            try {

    http://git-wip-us.apache.org/repos/asf/hive/blob/632a3090/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    index 89286e5..cef3303 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    @@ -249,13 +249,14 @@ public class TezSessionState {
        }

        /**
    - * Close a tez session. Will cleanup any tez/am related resources. After closing a session
    - * no further DAGs can be executed against it.
    - * @param keepTmpDir whether or not to remove the scratch dir at the same time.
    - * @throws IOException
    - * @throws TezException
    + * Close a tez session. Will cleanup any tez/am related resources. After closing a session no
    + * further DAGs can be executed against it.
    + *
    + * @param keepTmpDir
    + * whether or not to remove the scratch dir at the same time.
    + * @throws Exception
         */
    - public void close(boolean keepTmpDir) throws TezException, IOException {
    + public void close(boolean keepTmpDir) throws Exception {
          if (!isOpen()) {
            return;
          }
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-10239: Create scripts to do metastore upgrade tests on jenkins for Derby and PostgreSQL (Naveen Gangam, reviewed by Sergio Pena)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39972026
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39972026
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39972026

    Branch: refs/heads/llap
    Commit: 399720263d875897cdd31a3de2521872cc565eb7
    Parents: d39c829
    Author: Sergio Pena <sergio.pena@cloudera.com>
    Authored: Wed May 6 14:29:23 2015 -0500
    Committer: Sergio Pena <sergio.pena@cloudera.com>
    Committed: Wed May 6 14:29:23 2015 -0500

    ----------------------------------------------------------------------
      metastore/dbs/derby/execute.sh | 37 +++++++++++++++++
      metastore/dbs/derby/prepare.sh | 63 +++++++++++++++++++++++++++++
      metastore/dbs/postgres/execute.sh | 29 ++++++++++++++
      metastore/dbs/postgres/prepare.sh | 72 ++++++++++++++++++++++++++++++++++
      4 files changed, 201 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/39972026/metastore/dbs/derby/execute.sh
    ----------------------------------------------------------------------
    diff --git a/metastore/dbs/derby/execute.sh b/metastore/dbs/derby/execute.sh
    new file mode 100644
    index 0000000..d60f05b
    --- /dev/null
    +++ b/metastore/dbs/derby/execute.sh
    @@ -0,0 +1,37 @@
    +#!/bin/bash
    +
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements. See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License. You may obtain a copy of the License at
    +#
    +# http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +# This script executes all hive metastore upgrade scripts on an specific
    +# database server in order to verify that upgrade scripts are working
    +# properly.
    +
    +cd $(dirname $1)
    +
    +echo "####################################################"
    +echo "Executing script for Derby SQL: $1"
    +echo "####################################################"
    +
    +export DERBY_HOME=/usr/share/javadb
    +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
    +export PATH=$PATH:$DERBY_HOME/bin:$JAVA_HOME/bin
    +export CLASSPATH=$CLASSPATH:$DERBY_HOME/lib/derby.jar:$DERBY_HOME/lib/derbytools.jar:$DERBY_HOME/lib/derbyclient.jar
    +
    +echo "connect 'jdbc:derby:/tmp/hive_hms_testing;create=true';" > /tmp/derbyRun.sql
    +echo "run '$1';" >> /tmp/derbyRun.sql
    +echo "quit;" >> /tmp/derbyRun.sql
    +
    +ij /tmp/derbyRun.sql

    http://git-wip-us.apache.org/repos/asf/hive/blob/39972026/metastore/dbs/derby/prepare.sh
    ----------------------------------------------------------------------
    diff --git a/metastore/dbs/derby/prepare.sh b/metastore/dbs/derby/prepare.sh
    new file mode 100644
    index 0000000..fe4b2c3
    --- /dev/null
    +++ b/metastore/dbs/derby/prepare.sh
    @@ -0,0 +1,63 @@
    +#!/bin/bash
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements. See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License. You may obtain a copy of the License at
    +#
    +# http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +# This script executes all hive metastore upgrade scripts on an specific
    +# database server in order to verify that upgrade scripts are working
    +# properly.
    +
    +export DEBIAN_FRONTEND=noninteractive
    +OS_VERSION=`lsb_release -c | cut -d":" -f2`
    +
    +echo "####################################################"
    +echo "Begin for OS version $OS_VERSION"
    +echo "####################################################"
    +
    +HTTPS_INFO=($(dpkg -l apt-transport-https | grep ^i | tr -s ' '))
    +if [[ ${HTTPS_INFO[1]} == "apt-transport-https" ]]
    +then
    + echo "apt-transport-https package installed"
    +else
    + echo "apt-transport-https package not installed"
    + apt-get install -y --force-yes apt-transport-https
    +fi
    +
    +INSTALL_INFO=($(dpkg -l \*javadb-core\* | grep ^ii | tr -s ' '))
    +
    +if [[ ${INSTALL_INFO[1]} == "sun-javadb-core" ]]
    +then
    + echo "Derby already installed...Skipping"
    +else
    + echo "Derby not installed"
    + # Cleanup existing installation + configuration.
    + apt-get purge -y --force-yes derby-tools sun-javadb-client sun-javadb-core sun-javadb-common libderby-java openjdk-7-jre openjdk-7-jre openjdk-7-jre-headless || /bin/true
    + echo "####################################################"
    + echo "Installing Derby dependencies:"
    + echo "####################################################"
    + apt-get update || /bin/true
    + apt-get install -y --force-yes -o Dpkg::Options::="--force-overwrite" sun-javadb-core sun-javadb-client derby-tools
    +fi
    +
    +export DERBY_HOME=/usr/share/javadb
    +export JAVA_HOME=//usr/lib/jvm/java-7-openjdk-amd64
    +export PATH=$PATH:/usr/share/javadb/bin:$JAVA_HOME/bin
    +export CLASSPATH=$CLASSPATH:$DERBY_HOME/lib/derby.jar:$DERBY_HOME/lib/derbytools.jar:$DERBY_HOME/lib/derbyclient.jar
    +rm -rf /tmp/hive_hms_testing;
    +
    +echo "connect 'jdbc:derby:/tmp/hive_hms_testing;create=true';" > /tmp/derbyInit.sql
    +ij /tmp/derbyInit.sql
    +
    +echo "DONE!!!"
    +

    http://git-wip-us.apache.org/repos/asf/hive/blob/39972026/metastore/dbs/postgres/execute.sh
    ----------------------------------------------------------------------
    diff --git a/metastore/dbs/postgres/execute.sh b/metastore/dbs/postgres/execute.sh
    new file mode 100644
    index 0000000..cabcae4
    --- /dev/null
    +++ b/metastore/dbs/postgres/execute.sh
    @@ -0,0 +1,29 @@
    +#!/bin/bash
    +
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements. See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License. You may obtain a copy of the License at
    +#
    +# http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +# This script executes all hive metastore upgrade scripts on an specific
    +# database server in order to verify that upgrade scripts are working
    +# properly.
    +
    +cd $(dirname $1)
    +
    +echo "####################################################"
    +echo "Executing script for PostgreSQL: $1"
    +echo "####################################################"
    +
    +export PGPASSWORD=hivepw
    +psql -h localhost -U hiveuser -d hive_hms_testing -f $1

    http://git-wip-us.apache.org/repos/asf/hive/blob/39972026/metastore/dbs/postgres/prepare.sh
    ----------------------------------------------------------------------
    diff --git a/metastore/dbs/postgres/prepare.sh b/metastore/dbs/postgres/prepare.sh
    new file mode 100644
    index 0000000..2036354
    --- /dev/null
    +++ b/metastore/dbs/postgres/prepare.sh
    @@ -0,0 +1,72 @@
    +#!/bin/bash
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements. See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License. You may obtain a copy of the License at
    +#
    +# http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +# This script executes all hive metastore upgrade scripts on an specific
    +# database server in order to verify that upgrade scripts are working
    +# properly.
    +
    +export DEBIAN_FRONTEND=noninteractive
    +OS_VERSION=`lsb_release -c | cut -d":" -f2`
    +echo "$OS_VERSION"
    +
    +echo "####################################################"
    +echo "Begin for OS version $OS_VERSION"
    +echo "####################################################"
    +
    +HTTPS_INFO=($(dpkg -l apt-transport-https | grep ^i | tr -s ' '))
    +if [[ ${HTTPS_INFO[1]} == "apt-transport-https" ]]
    +then
    + echo "apt-transport-https package installed"
    +else
    + echo "apt-transport-https package not installed"
    + apt-get install -y --force-yes apt-transport-https
    +fi
    +
    +INSTALL_INFO=($(dpkg -l postgresql-9.4\* | grep ^i | tr -s ' '))
    +
    +if [[ ${INSTALL_INFO[1]} == "postgresql-9.4" ]]
    +then
    + echo "PostgreSQL already installed...Skipping"
    +else
    + echo "PostgreSQL not installed"
    + # Cleanup existing installation + configuration.
    + apt-get purge -y --force-yes postgressql-9.4 || /bin/true
    + echo "####################################################"
    + echo "Installing PostgreSQL dependencies:"
    + echo "####################################################"
    + if grep -q "deb http://apt.postgresql.org/pub/repos/apt/ $OS_VERSION-pgdg main" /etc/apt/sources.list.d/postgreSQL.list
    + then
    + echo "Sources already listed"
    + else
    + echo "deb http://apt.postgresql.org/pub/repos/apt/ $OS_VERSION-pgdg main" >> /etc/apt/sources.list.d/postgreSQL.list
    + fi
    +
    + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
    + apt-get update || /bin/true
    + apt-get install -y --force-yes postgresql-9.4
    +fi
    +
    +echo "####################################################"
    +echo "Configuring PostgreSQL Environment:"
    +echo "####################################################"
    +echo "drop database if exists hive_hms_testing;" > /tmp/postgresInit.sql
    +echo "drop user if exists hiveuser;" >> /tmp/postgresInit.sql
    +echo "create user hiveuser createdb createuser password 'hivepw';" >> /tmp/postgresInit.sql
    +echo "create database hive_hms_testing owner hiveuser;" >> /tmp/postgresInit.sql
    +sudo -u postgres psql -f /tmp/postgresInit.sql
    +
    +echo "DONE!!!"
    +
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-9845 : HCatSplit repeats information making input split data size huge (Mithun Radhakrishnan via Sushanth Sowmyan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/18fb4601
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/18fb4601
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/18fb4601

    Branch: refs/heads/llap
    Commit: 18fb460179ff48d2c1e65f324799b4315616f14b
    Parents: dc72c87
    Author: Sushanth Sowmyan <khorgath@gmail.com>
    Authored: Wed May 6 14:03:37 2015 -0700
    Committer: Sushanth Sowmyan <khorgath@gmail.com>
    Committed: Wed May 6 14:04:32 2015 -0700

    ----------------------------------------------------------------------
      .../hcatalog/mapreduce/HCatBaseInputFormat.java | 20 ++--
      .../hive/hcatalog/mapreduce/HCatSplit.java | 21 +---
      .../hive/hcatalog/mapreduce/HCatTableInfo.java | 12 ++
      .../hive/hcatalog/mapreduce/InputJobInfo.java | 5 +
      .../hive/hcatalog/mapreduce/PartInfo.java | 117 +++++++++++++++++--
      .../mapreduce/TestHCatOutputFormat.java | 5 +-
      6 files changed, 139 insertions(+), 41 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java
    index 55b97dd..adfaf4e 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java
    @@ -130,16 +130,6 @@ public abstract class HCatBaseInputFormat
            setInputPath(jobConf, partitionInfo.getLocation());
            Map<String, String> jobProperties = partitionInfo.getJobProperties();

    - HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
    - for (HCatFieldSchema field :
    - inputJobInfo.getTableInfo().getDataColumns().getFields()) {
    - allCols.append(field);
    - }
    - for (HCatFieldSchema field :
    - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) {
    - allCols.append(field);
    - }
    -
            HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);

            storageHandler = HCatUtil.getStorageHandler(
    @@ -163,9 +153,7 @@ public abstract class HCatBaseInputFormat
              inputFormat.getSplits(jobConf, desiredNumSplits);

            for (org.apache.hadoop.mapred.InputSplit split : baseSplits) {
    - splits.add(new HCatSplit(
    - partitionInfo,
    - split, allCols));
    + splits.add(new HCatSplit(partitionInfo, split));
            }
          }

    @@ -190,6 +178,12 @@ public abstract class HCatBaseInputFormat

          HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split);
          PartInfo partitionInfo = hcatSplit.getPartitionInfo();
    + // Ensure PartInfo's TableInfo is initialized.
    + if (partitionInfo.getTableInfo() == null) {
    + partitionInfo.setTableInfo(((InputJobInfo)HCatUtil.deserialize(
    + taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)
    + )).getTableInfo());
    + }
          JobContext jobContext = taskContext;
          Configuration conf = jobContext.getConfiguration();


    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java
    index bcedb3a..0aa498a 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java
    @@ -24,7 +24,6 @@ import java.io.IOException;
      import java.lang.reflect.Constructor;

      import org.apache.hadoop.hive.common.JavaUtils;
    -import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.io.Writable;
      import org.apache.hadoop.io.WritableUtils;
      import org.apache.hadoop.mapreduce.InputSplit;
    @@ -44,11 +43,6 @@ public class HCatSplit extends InputSplit
        /** The split returned by the underlying InputFormat split. */
        private org.apache.hadoop.mapred.InputSplit baseMapRedSplit;

    - /** The schema for the HCatTable */
    - private HCatSchema tableSchema;
    -
    - private HiveConf hiveConf;
    -
        /**
         * Instantiates a new hcat split.
         */
    @@ -60,16 +54,13 @@ public class HCatSplit extends InputSplit
         *
         * @param partitionInfo the partition info
         * @param baseMapRedSplit the base mapred split
    - * @param tableSchema the table level schema
         */
        public HCatSplit(PartInfo partitionInfo,
    - org.apache.hadoop.mapred.InputSplit baseMapRedSplit,
    - HCatSchema tableSchema) {
    + org.apache.hadoop.mapred.InputSplit baseMapRedSplit) {

          this.partitionInfo = partitionInfo;
          // dataSchema can be obtained from partitionInfo.getPartitionSchema()
          this.baseMapRedSplit = baseMapRedSplit;
    - this.tableSchema = tableSchema;
        }

        /**
    @@ -101,7 +92,8 @@ public class HCatSplit extends InputSplit
         * @return the table schema
         */
        public HCatSchema getTableSchema() {
    - return this.tableSchema;
    + assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point.";
    + return this.partitionInfo.getTableInfo().getAllColumns();
        }

        /* (non-Javadoc)
    @@ -159,9 +151,6 @@ public class HCatSplit extends InputSplit
          } catch (Exception e) {
            throw new IOException("Exception from " + baseSplitClassName, e);
          }
    -
    - String tableSchemaString = WritableUtils.readString(input);
    - tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString);
        }

        /* (non-Javadoc)
    @@ -178,10 +167,6 @@ public class HCatSplit extends InputSplit
          Writable baseSplitWritable = (Writable) baseMapRedSplit;
          //write baseSplit into output
          baseSplitWritable.write(output);
    -
    - //write the table schema into output
    - String tableSchemaString = HCatUtil.serialize(tableSchema);
    - WritableUtils.writeString(output, tableSchemaString);
        }

      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java
    index 13faf15..14c93ab 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java
    @@ -21,10 +21,13 @@ package org.apache.hive.hcatalog.mapreduce;

      import java.io.IOException;
      import java.io.Serializable;
    +import java.util.List;

    +import com.google.common.collect.Lists;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.metastore.api.Table;
      import org.apache.hive.hcatalog.common.HCatUtil;
    +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
      import org.apache.hive.hcatalog.data.schema.HCatSchema;

      /**
    @@ -112,6 +115,15 @@ public class HCatTableInfo implements Serializable {
        }

        /**
    + * @return HCatSchema with all columns (i.e. data and partition columns).
    + */
    + public HCatSchema getAllColumns() {
    + List<HCatFieldSchema> allColumns = Lists.newArrayList(dataColumns.getFields());
    + allColumns.addAll(partitionColumns.getFields());
    + return new HCatSchema(allColumns);
    + }
    +
    + /**
         * @return the storerInfo
         */
        public StorerInfo getStorerInfo() {

    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java
    index 360e77b..1f23f3f 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java
    @@ -182,5 +182,10 @@ public class InputJobInfo implements Serializable {
          ObjectInputStream partInfoReader =
            new ObjectInputStream(new InflaterInputStream(ois));
          partitions = (List<PartInfo>)partInfoReader.readObject();
    + for (PartInfo partInfo : partitions) {
    + if (partInfo.getTableInfo() == null) {
    + partInfo.setTableInfo(this.tableInfo);
    + }
    + }
        }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java
    index 651a9a0..fca0a92 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java
    @@ -18,27 +18,32 @@
       */
      package org.apache.hive.hcatalog.mapreduce;

    +import java.io.IOException;
    +import java.io.ObjectOutputStream;
      import java.io.Serializable;
      import java.util.Map;
      import java.util.Properties;

      import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
      import org.apache.hive.hcatalog.data.schema.HCatSchema;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;

      /** The Class used to serialize the partition information read from the metadata server that maps to a partition. */
      public class PartInfo implements Serializable {

    + private static Logger LOG = LoggerFactory.getLogger(PartInfo.class);
        /** The serialization version */
        private static final long serialVersionUID = 1L;

    - /** The partition schema. */
    - private final HCatSchema partitionSchema;
    + /** The partition data-schema. */
    + private HCatSchema partitionSchema;

        /** The information about which input storage handler to use */
    - private final String storageHandlerClassName;
    - private final String inputFormatClassName;
    - private final String outputFormatClassName;
    - private final String serdeClassName;
    + private String storageHandlerClassName;
    + private String inputFormatClassName;
    + private String outputFormatClassName;
    + private String serdeClassName;

        /** HCat-specific properties set at the partition */
        private final Properties hcatProperties;
    @@ -52,8 +57,11 @@ public class PartInfo implements Serializable {
        /** Job properties associated with this parition */
        Map<String, String> jobProperties;

    - /** the table info associated with this partition */
    - HCatTableInfo tableInfo;
    + /**
    + * The table info associated with this partition.
    + * Not serialized per PartInfo instance. Constant, per table.
    + */
    + transient HCatTableInfo tableInfo;

        /**
         * Instantiates a new hcat partition info.
    @@ -162,4 +170,97 @@ public class PartInfo implements Serializable {
        public HCatTableInfo getTableInfo() {
          return tableInfo;
        }
    +
    + void setTableInfo(HCatTableInfo thatTableInfo) {
    + this.tableInfo = thatTableInfo;
    +
    + if (partitionSchema == null) {
    + partitionSchema = tableInfo.getDataColumns();
    + }
    +
    + if (storageHandlerClassName == null) {
    + storageHandlerClassName = tableInfo.getStorerInfo().getStorageHandlerClass();
    + }
    +
    + if (inputFormatClassName == null) {
    + inputFormatClassName = tableInfo.getStorerInfo().getIfClass();
    + }
    +
    + if (outputFormatClassName == null) {
    + outputFormatClassName = tableInfo.getStorerInfo().getOfClass();
    + }
    +
    + if (serdeClassName == null) {
    + serdeClassName = tableInfo.getStorerInfo().getSerdeClass();
    + }
    + }
    +
    + /**
    + * Serialization method. Suppresses serialization of redundant information that's already
    + * available from TableInfo.
    + */
    + private void writeObject(ObjectOutputStream oos)
    + throws IOException {
    + // Suppress commonality with TableInfo.
    +
    + assert tableInfo != null : "TableInfo can't be null at this point.";
    +
    + if (partitionSchema != null) {
    + if (partitionSchema.equals(tableInfo.getDataColumns())) {
    + partitionSchema = null;
    + } else {
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Can't suppress data-schema. Partition-schema and table-schema seem to differ! "
    + + " partitionSchema: " + partitionSchema.getFields()
    + + " tableSchema: " + tableInfo.getDataColumns());
    + }
    + }
    + }
    +
    + if (storageHandlerClassName != null) {
    + if (storageHandlerClassName.equals(tableInfo.getStorerInfo().getStorageHandlerClass())) {
    + storageHandlerClassName = null;
    + } else {
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Partition's storageHandler (" + storageHandlerClassName + ") " +
    + "differs from table's storageHandler (" + tableInfo.getStorerInfo().getStorageHandlerClass() + ").");
    + }
    + }
    + }
    +
    + if (inputFormatClassName != null) {
    + if (inputFormatClassName.equals(tableInfo.getStorerInfo().getIfClass())) {
    + inputFormatClassName = null;
    + } else {
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Partition's InputFormat (" + inputFormatClassName + ") " +
    + "differs from table's InputFormat (" + tableInfo.getStorerInfo().getIfClass() + ").");
    + }
    + }
    + }
    +
    + if (outputFormatClassName != null) {
    + if (outputFormatClassName.equals(tableInfo.getStorerInfo().getOfClass())) {
    + outputFormatClassName = null;
    + } else {
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Partition's OutputFormat (" + outputFormatClassName + ") " +
    + "differs from table's OutputFormat (" + tableInfo.getStorerInfo().getOfClass() + ").");
    + }
    + }
    + }
    +
    + if (serdeClassName != null) {
    + if (serdeClassName.equals(tableInfo.getStorerInfo().getSerdeClass())) {
    + serdeClassName = null;
    + } else {
    + if (LOG.isDebugEnabled()) {
    + LOG.debug("Partition's SerDe (" + serdeClassName + ") " +
    + "differs from table's SerDe (" + tableInfo.getStorerInfo().getSerdeClass() + ").");
    + }
    + }
    + }
    +
    + oos.defaultWriteObject();
    + }
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/18fb4601/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
    index add9d41..f716da9 100644
    --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
    +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java
    @@ -25,6 +25,7 @@ import java.util.HashMap;
      import java.util.List;
      import java.util.Map;

    +import com.google.common.collect.Lists;
      import junit.framework.TestCase;

      import org.apache.hadoop.conf.Configuration;
    @@ -106,7 +107,7 @@ public class TestHCatOutputFormat extends TestCase {
          tbl.setDbName(dbName);
          tbl.setTableName(tblName);
          StorageDescriptor sd = new StorageDescriptor();
    - sd.setCols(fields);
    + sd.setCols(Lists.newArrayList(new FieldSchema("data_column", serdeConstants.STRING_TYPE_NAME, "")));
          tbl.setSd(sd);

          //sd.setLocation("hdfs://tmp");
    @@ -151,7 +152,7 @@ public class TestHCatOutputFormat extends TestCase {
          assertEquals(1, jobInfo.getPartitionValues().size());
          assertEquals("p1", jobInfo.getPartitionValues().get("colname"));
          assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size());
    - assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName());
    + assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName());

          publishTest(job);
        }
  • Prasanthj at May 7, 2015 at 1:20 am
    Merge branch 'ci8915'


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f895b277
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f895b277
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f895b277

    Branch: refs/heads/llap
    Commit: f895b277399fc37cdcd349a321ddf12c49756a5c
    Parents: 1988615 c40c6de
    Author: Alan Gates <gates@hortonworks.com>
    Authored: Tue May 5 09:12:15 2015 -0700
    Committer: Alan Gates <gates@hortonworks.com>
    Committed: Tue May 5 09:12:15 2015 -0700

    ----------------------------------------------------------------------
      .../hadoop/hive/ql/txn/compactor/Cleaner.java | 18 ++++++++----
      .../hadoop/hive/ql/txn/compactor/Worker.java | 30 ++++++++++++++------
      2 files changed, 33 insertions(+), 15 deletions(-)
    ----------------------------------------------------------------------
  • Prasanthj at May 7, 2015 at 1:20 am
    HIVE-9582 : HCatalog should use IMetaStoreClient interface (Thiruvel Thirumoolan, reviewed by Sushanth Sowmyan, Thejas Nair)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/45307c10
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/45307c10
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/45307c10

    Branch: refs/heads/llap
    Commit: 45307c10e472e7dd42b28310f9adf7afe27bf6d7
    Parents: c011673
    Author: Sushanth Sowmyan <khorgath@gmail.com>
    Authored: Wed May 6 02:32:06 2015 -0700
    Committer: Sushanth Sowmyan <khorgath@gmail.com>
    Committed: Wed May 6 02:34:23 2015 -0700

    ----------------------------------------------------------------------
      .../apache/hive/hcatalog/common/HCatUtil.java | 37 ++++++---
      .../hive/hcatalog/common/HiveClientCache.java | 85 +++++++++++++-------
      .../DefaultOutputCommitterContainer.java | 6 +-
      .../mapreduce/FileOutputCommitterContainer.java | 14 ++--
      .../mapreduce/FileOutputFormatContainer.java | 8 +-
      .../hcatalog/mapreduce/HCatOutputFormat.java | 6 +-
      .../hcatalog/mapreduce/InitializeInput.java | 6 +-
      .../hive/hcatalog/mapreduce/Security.java | 10 +--
      .../hcatalog/common/TestHiveClientCache.java | 37 +++++----
      .../hcatalog/mapreduce/HCatMapReduceTest.java | 2 +-
      .../hcatalog/mapreduce/TestPassProperties.java | 2 +-
      .../apache/hive/hcatalog/pig/PigHCatUtil.java | 10 +--
      .../streaming/AbstractRecordWriter.java | 11 ++-
      .../hive/hcatalog/streaming/HiveEndPoint.java | 9 ++-
      .../hive/hcatalog/api/HCatClientHMSImpl.java | 17 ++--
      .../hcatalog/templeton/CompleteDelegator.java | 6 +-
      .../hcatalog/templeton/SecureProxySupport.java | 9 ++-
      .../templeton/tool/TempletonControllerJob.java | 7 +-
      .../hadoop/hive/metastore/IMetaStoreClient.java | 3 +
      19 files changed, 173 insertions(+), 112 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java
    index 63909b8..3ee30ed 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java
    @@ -38,9 +38,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
      import org.apache.hadoop.classification.InterfaceStability;
      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.permission.FsAction;
    -import org.apache.hadoop.hive.common.JavaUtils;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
      import org.apache.hadoop.hive.metastore.api.MetaException;
    @@ -175,7 +175,7 @@ public class HCatUtil {
          }
        }

    - public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName)
    + public static Table getTable(IMetaStoreClient client, String dbName, String tableName)
          throws NoSuchObjectException, TException, MetaException {
          return new Table(client.getTable(dbName, tableName));
        }
    @@ -538,17 +538,17 @@ public class HCatUtil {
         * @throws MetaException When HiveMetaStoreClient couldn't be created
         * @throws IOException
         */
    - public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf)
    - throws MetaException, IOException {
    + public static IMetaStoreClient getHiveMetastoreClient(HiveConf hiveConf)
    + throws MetaException, IOException {

          if (hiveConf.getBoolean(HCatConstants.HCAT_HIVE_CLIENT_DISABLE_CACHE, false)){
            // If cache is disabled, don't use it.
    - return HiveClientCache.getNonCachedHiveClient(hiveConf);
    + return HiveClientCache.getNonCachedHiveMetastoreClient(hiveConf);
          }

          // Singleton behaviour: create the cache instance if required.
          if (hiveClientCache == null) {
    - synchronized (HiveMetaStoreClient.class) {
    + synchronized (IMetaStoreClient.class) {
              if (hiveClientCache == null) {
                hiveClientCache = new HiveClientCache(hiveConf);
              }
    @@ -561,11 +561,30 @@ public class HCatUtil {
          }
        }

    - private static HiveMetaStoreClient getNonCachedHiveClient(HiveConf hiveConf) throws MetaException{
    - return new HiveMetaStoreClient(hiveConf);
    + /**
    + * Get or create a hive client depending on whether it exits in cache or not.
    + * @Deprecated : use {@link #getHiveMetastoreClient(HiveConf)} instead.
    + * This was deprecated in Hive 1.2, slated for removal in two versions
    + * (i.e. 1.2 & 1.3(projected) will have it, but it will be removed after that)
    + * @param hiveConf The hive configuration
    + * @return the client
    + * @throws MetaException When HiveMetaStoreClient couldn't be created
    + * @throws IOException
    + */
    + @Deprecated
    + public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) throws MetaException, IOException {
    + IMetaStoreClient imsc = getHiveMetastoreClient(hiveConf);
    + // Try piggybacking on the function that returns IMSC. Current implementation of the IMSC cache
    + // has CacheableMetaStoreClients, which are HMSC, so we can return them as-is. If not, it's okay
    + // for us to ignore the caching aspect and return a vanilla HMSC.
    + if (imsc instanceof HiveMetaStoreClient){
    + return (HiveMetaStoreClient)imsc;
    + } else {
    + return new HiveMetaStoreClient(hiveConf);
    + }
        }

    - public static void closeHiveClientQuietly(HiveMetaStoreClient client) {
    + public static void closeHiveClientQuietly(IMetaStoreClient client) {
          try {
            if (client != null)
              client.close();

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java
    index a001252..578b6ea 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java
    @@ -34,6 +34,7 @@ import org.apache.commons.lang.builder.EqualsBuilder;
      import org.apache.commons.lang.builder.HashCodeBuilder;
      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.MetaException;
      import org.apache.hadoop.hive.shims.ShimLoader;
      import org.apache.hadoop.hive.shims.Utils;
    @@ -54,7 +55,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
      class HiveClientCache {
        public final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60;

    - final private Cache<HiveClientCacheKey, CacheableHiveMetaStoreClient> hiveCache;
    + final private Cache<HiveClientCacheKey, ICacheableMetaStoreClient> hiveCache;
        private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class);
        private final int timeout;
        // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get()
    @@ -79,7 +80,7 @@ class HiveClientCache {
          return threadId.get();
        }

    - public static HiveMetaStoreClient getNonCachedHiveClient(HiveConf hiveConf) throws MetaException {
    + public static IMetaStoreClient getNonCachedHiveMetastoreClient(HiveConf hiveConf) throws MetaException {
          return new HiveMetaStoreClient(hiveConf);
        }

    @@ -92,11 +93,11 @@ class HiveClientCache {
         */
        public HiveClientCache(final int timeout) {
          this.timeout = timeout;
    - RemovalListener<HiveClientCacheKey, CacheableHiveMetaStoreClient> removalListener =
    - new RemovalListener<HiveClientCacheKey, CacheableHiveMetaStoreClient>() {
    + RemovalListener<HiveClientCacheKey, ICacheableMetaStoreClient> removalListener =
    + new RemovalListener<HiveClientCacheKey, ICacheableMetaStoreClient>() {
              @Override
    - public void onRemoval(RemovalNotification<HiveClientCacheKey, CacheableHiveMetaStoreClient> notification) {
    - CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue();
    + public void onRemoval(RemovalNotification<HiveClientCacheKey, ICacheableMetaStoreClient> notification) {
    + ICacheableMetaStoreClient hiveMetaStoreClient = notification.getValue();
                if (hiveMetaStoreClient != null) {
                  synchronized (CACHE_TEARDOWN_LOCK) {
                    hiveMetaStoreClient.setExpiredFromCache();
    @@ -169,8 +170,8 @@ class HiveClientCache {
         */
        void closeAllClientsQuietly() {
          try {
    - ConcurrentMap<HiveClientCacheKey, CacheableHiveMetaStoreClient> elements = hiveCache.asMap();
    - for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) {
    + ConcurrentMap<HiveClientCacheKey, ICacheableMetaStoreClient> elements = hiveCache.asMap();
    + for (ICacheableMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) {
              cacheableHiveMetaStoreClient.tearDown();
            }
          } catch (Exception e) {
    @@ -191,24 +192,24 @@ class HiveClientCache {
         * @throws IOException
         * @throws LoginException
         */
    - public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException {
    + public ICacheableMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException {
          final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId());
    - CacheableHiveMetaStoreClient hiveMetaStoreClient = null;
    + ICacheableMetaStoreClient cacheableHiveMetaStoreClient = null;
          // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck
          // is if removalListener closes it. The synchronization takes care that removalListener won't do it
          synchronized (CACHE_TEARDOWN_LOCK) {
    - hiveMetaStoreClient = getOrCreate(cacheKey);
    - hiveMetaStoreClient.acquire();
    + cacheableHiveMetaStoreClient = getOrCreate(cacheKey);
    + cacheableHiveMetaStoreClient.acquire();
          }
    - if (!hiveMetaStoreClient.isOpen()) {
    + if (!cacheableHiveMetaStoreClient.isOpen()) {
            synchronized (CACHE_TEARDOWN_LOCK) {
              hiveCache.invalidate(cacheKey);
    - hiveMetaStoreClient.close();
    - hiveMetaStoreClient = getOrCreate(cacheKey);
    - hiveMetaStoreClient.acquire();
    + cacheableHiveMetaStoreClient.close();
    + cacheableHiveMetaStoreClient = getOrCreate(cacheKey);
    + cacheableHiveMetaStoreClient.acquire();
            }
          }
    - return hiveMetaStoreClient;
    + return cacheableHiveMetaStoreClient;
        }

        /**
    @@ -219,11 +220,12 @@ class HiveClientCache {
         * @throws MetaException
         * @throws LoginException
         */
    - private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException {
    + private ICacheableMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey)
    + throws IOException, MetaException, LoginException {
          try {
    - return hiveCache.get(cacheKey, new Callable<CacheableHiveMetaStoreClient>() {
    + return hiveCache.get(cacheKey, new Callable<ICacheableMetaStoreClient>() {
              @Override
    - public CacheableHiveMetaStoreClient call() throws MetaException {
    + public ICacheableMetaStoreClient call() throws MetaException {
                return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout);
              }
            });
    @@ -289,28 +291,48 @@ class HiveClientCache {
          }
        }

    + public interface ICacheableMetaStoreClient extends IMetaStoreClient {
    +
    + void acquire();
    +
    + void release();
    +
    + void setExpiredFromCache();
    +
    + AtomicInteger getUsers();
    +
    + boolean isClosed();
    +
    + boolean isOpen();
    +
    + void tearDownIfUnused();
    +
    + void tearDown();
    + }
    +
        /**
         * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it.
         */
    - public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient {
    + static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient implements ICacheableMetaStoreClient {
    +
          private final AtomicInteger users = new AtomicInteger(0);
          private volatile boolean expiredFromCache = false;
          private boolean isClosed = false;
          private final long expiryTime;
          private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000;

    - public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException {
    + CacheableHiveMetaStoreClient(final HiveConf conf, final Integer timeout) throws MetaException {
            super(conf);
            // Extend the expiry time with some extra time on top of guava expiry time to make sure
            // that items closed() are for sure expired and would never be returned by guava.
            this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS;
          }

    - private void acquire() {
    + public void acquire() {
            users.incrementAndGet();
          }

    - private void release() {
    + public void release() {
            users.decrementAndGet();
          }

    @@ -322,15 +344,22 @@ class HiveClientCache {
            return isClosed;
          }

    + /*
    + * Used only for Debugging or testing purposes
    + */
    + public AtomicInteger getUsers() {
    + return users;
    + }
    +
          /**
           * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides
           * invalid data renders the client unusable for future use (example: create a table with very long table name)
           * @return
           */
    - protected boolean isOpen() {
    + public boolean isOpen() {
            try {
              // Look for an unlikely database name and see if either MetaException or TException is thrown
    - this.getDatabases("NonExistentDatabaseUsedForHealthCheck");
    + super.getDatabases("NonExistentDatabaseUsedForHealthCheck");
            } catch (TException e) {
              return false;
            }
    @@ -354,7 +383,7 @@ class HiveClientCache {
           * 1. There are no active user
           * 2. It has expired from the cache
           */
    - private void tearDownIfUnused() {
    + public void tearDownIfUnused() {
            if (users.get() == 0 && expiredFromCache) {
              this.tearDown();
            }
    @@ -363,7 +392,7 @@ class HiveClientCache {
          /**
           * Close if not closed already
           */
    - protected synchronized void tearDown() {
    + public synchronized void tearDown() {
            try {
              if (!isClosed) {
                super.close();

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java
    index cead40d..90c2d71 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java
    @@ -22,7 +22,7 @@ package org.apache.hive.hcatalog.mapreduce;
      import java.io.IOException;

      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.mapreduce.JobContext;
      import org.apache.hadoop.mapreduce.JobStatus.State;
      import org.apache.hadoop.mapreduce.TaskAttemptContext;
    @@ -90,10 +90,10 @@ class DefaultOutputCommitterContainer extends OutputCommitterContainer {
          getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context));

          //Cancel HCat and JobTracker tokens
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          try {
            HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration());
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            String tokenStrForm = client.getTokenStrForm();
            if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) {
              client.cancelDelegationToken(tokenStrForm);

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
    index 8146d85..367f4ea 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
    @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.permission.FsPermission;
      import org.apache.hadoop.hive.common.FileUtils;
      import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
      import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
    @@ -43,7 +44,6 @@ import org.apache.hadoop.hive.metastore.api.Partition;
      import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
      import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
      import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.Warehouse;
      import org.apache.hadoop.hive.ql.metadata.Table;
      import org.apache.hadoop.hive.shims.ShimLoader;
    @@ -466,7 +466,7 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
         * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception
         * @throws org.apache.thrift.TException the t exception
         */
    - private void updateTableSchema(HiveMetaStoreClient client, Table table,
    + private void updateTableSchema(IMetaStoreClient client, Table table,
                         HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException {


    @@ -775,12 +775,12 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
            return;
          }

    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          HCatTableInfo tableInfo = jobInfo.getTableInfo();
          List<Partition> partitionsAdded = new ArrayList<Partition>();
          try {
            HiveConf hiveConf = HCatUtil.getHiveConf(conf);
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters());

            FileStatus tblStat = fs.getFileStatus(tblPath);
    @@ -952,7 +952,7 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
                // metastore
                for (Partition p : partitionsAdded) {
                  client.dropPartition(tableInfo.getDatabaseName(),
    - tableInfo.getTableName(), p.getValues());
    + tableInfo.getTableName(), p.getValues(), true);
                }
              } catch (Exception te) {
                // Keep cause as the original exception
    @@ -990,11 +990,11 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {

        private void cancelDelegationTokens(JobContext context) throws IOException{
          LOG.info("Cancelling delegation token for the job.");
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          try {
            HiveConf hiveConf = HCatUtil
                .getHiveConf(context.getConfiguration());
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            // cancel the deleg. tokens that were acquired for this job now that
            // we are done - we should cancel if the tokens were acquired by
            // HCatOutputFormat and not if they were supplied by Oozie.

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java
    index 1cd5306..001b59b 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java
    @@ -22,9 +22,9 @@ package org.apache.hive.hcatalog.mapreduce;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
      import org.apache.hadoop.hive.metastore.api.MetaException;
      import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
    @@ -113,10 +113,10 @@ class FileOutputFormatContainer extends OutputFormatContainer {
        @Override
        public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
          OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          try {
            HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration());
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            handleDuplicatePublish(context,
              jobInfo,
              client,
    @@ -163,7 +163,7 @@ class FileOutputFormatContainer extends OutputFormatContainer {
         * @throws org.apache.thrift.TException
         */
        private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo,
    - HiveMetaStoreClient client, Table table)
    + IMetaStoreClient client, Table table)
            throws IOException, MetaException, TException, NoSuchObjectException {

          /*

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java
    index 6947398..f9e71f0 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java
    @@ -31,8 +31,8 @@ import org.apache.hadoop.fs.permission.FsPermission;
      import org.apache.hadoop.hive.common.classification.InterfaceAudience;
      import org.apache.hadoop.hive.common.classification.InterfaceStability;
      import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
      import org.apache.hadoop.hive.metastore.api.Index;
      import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
    @@ -82,12 +82,12 @@ public class HCatOutputFormat extends HCatBaseOutputFormat {
        @SuppressWarnings("unchecked")
        public static void setOutput(Configuration conf, Credentials credentials,
                       OutputJobInfo outputJobInfo) throws IOException {
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;

          try {

            HiveConf hiveConf = HCatUtil.getHiveConf(conf);
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(),
              outputJobInfo.getTableName());


    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java
    index 1980ef5..2f07be1 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java
    @@ -27,8 +27,8 @@ import java.util.Properties;

      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.Partition;
      import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
      import org.apache.hadoop.hive.ql.metadata.Table;
    @@ -93,7 +93,7 @@ class InitializeInput {
         */
        private static InputJobInfo getInputJobInfo(
          Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception {
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          HiveConf hiveConf = null;
          try {
            if (conf != null) {
    @@ -101,7 +101,7 @@ class InitializeInput {
            } else {
              hiveConf = new HiveConf(HCatInputFormat.class);
            }
    - client = HCatUtil.getHiveClient(hiveConf);
    + client = HCatUtil.getHiveMetastoreClient(hiveConf);
            Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(),
              inputJobInfo.getTableName());


    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java
    index 39ef86e..9b62195 100644
    --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java
    +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java
    @@ -26,7 +26,7 @@ import java.util.Map;
      import java.util.Map.Entry;

      import org.apache.hadoop.conf.Configuration;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.MetaException;
      import org.apache.hadoop.hive.shims.ShimLoader;
      import org.apache.hadoop.hive.thrift.DelegationTokenSelector;
    @@ -103,7 +103,7 @@ final class Security {
        void handleSecurity(
          Credentials credentials,
          OutputJobInfo outputJobInfo,
    - HiveMetaStoreClient client,
    + IMetaStoreClient client,
          Configuration conf,
          boolean harRequested)
          throws IOException, MetaException, TException, Exception {
    @@ -136,7 +136,7 @@ final class Security {
              // hcat normally in OutputCommitter.commitJob()
              // when the JobTracker in Hadoop MapReduce starts supporting renewal of
              // arbitrary tokens, the renewer should be the principal of the JobTracker
    - hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature);
    + hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName(), ugi.getUserName()), tokenSignature);

              if (harRequested) {
                TokenSelector<? extends TokenIdentifier> jtTokenSelector =
    @@ -165,7 +165,7 @@ final class Security {
        void handleSecurity(
          Job job,
          OutputJobInfo outputJobInfo,
    - HiveMetaStoreClient client,
    + IMetaStoreClient client,
          Configuration conf,
          boolean harRequested)
          throws IOException, MetaException, TException, Exception {
    @@ -175,7 +175,7 @@ final class Security {
        // we should cancel hcat token if it was acquired by hcat
        // and not if it was supplied (ie Oozie). In the latter
        // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set
    - void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException {
    + void cancelToken(IMetaStoreClient client, JobContext context) throws IOException, MetaException {
          String tokenStrForm = client.getTokenStrForm();
          if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) {
            try {

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java
    index 63a5548..b2c9c7a 100644
    --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java
    +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java
    @@ -20,7 +20,7 @@ package org.apache.hive.hcatalog.common;

      import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.metastore.HiveMetaStore;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
      import org.apache.hadoop.hive.metastore.api.Database;
      import org.apache.hadoop.hive.metastore.api.FieldSchema;
    @@ -76,29 +76,28 @@ public class TestHiveClientCache {

        @Test
        public void testCacheHit() throws IOException, MetaException, LoginException {
    -
          HiveClientCache cache = new HiveClientCache(1000);
    - HiveMetaStoreClient client = cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client = cache.get(hiveConf);
          assertNotNull(client);
          client.close(); // close shouldn't matter

          // Setting a non important configuration should return the same client only
          hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10);
    - HiveMetaStoreClient client2 = cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client2 = cache.get(hiveConf);
          assertNotNull(client2);
    - assertEquals(client, client2);
    + assertEquals(client.getUsers(), client2.getUsers());
          client2.close();
        }

        @Test
        public void testCacheMiss() throws IOException, MetaException, LoginException {
          HiveClientCache cache = new HiveClientCache(1000);
    - HiveMetaStoreClient client = cache.get(hiveConf);
    + IMetaStoreClient client = cache.get(hiveConf);
          assertNotNull(client);

          // Set different uri as it is one of the criteria deciding whether to return the same client or not
          hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different
    - HiveMetaStoreClient client2 = cache.get(hiveConf);
    + IMetaStoreClient client2 = cache.get(hiveConf);
          assertNotNull(client2);
          assertNotSame(client, client2);
        }
    @@ -110,11 +109,11 @@ public class TestHiveClientCache {
        @Test
        public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException {
          HiveClientCache cache = new HiveClientCache(1);
    - HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client = cache.get(hiveConf);
          assertNotNull(client);

          Thread.sleep(2500);
    - HiveMetaStoreClient client2 = cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client2 = cache.get(hiveConf);
          client.close();
          assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client

    @@ -132,21 +131,21 @@ public class TestHiveClientCache {
        public void testMultipleThreadAccess() throws ExecutionException, InterruptedException {
          final HiveClientCache cache = new HiveClientCache(1000);

    - class GetHiveClient implements Callable<HiveMetaStoreClient> {
    + class GetHiveClient implements Callable<IMetaStoreClient> {
            @Override
    - public HiveMetaStoreClient call() throws IOException, MetaException, LoginException {
    + public IMetaStoreClient call() throws IOException, MetaException, LoginException {
              return cache.get(hiveConf);
            }
          }

          ExecutorService executor = Executors.newFixedThreadPool(2);

    - Callable<HiveMetaStoreClient> worker1 = new GetHiveClient();
    - Callable<HiveMetaStoreClient> worker2 = new GetHiveClient();
    - Future<HiveMetaStoreClient> clientFuture1 = executor.submit(worker1);
    - Future<HiveMetaStoreClient> clientFuture2 = executor.submit(worker2);
    - HiveMetaStoreClient client1 = clientFuture1.get();
    - HiveMetaStoreClient client2 = clientFuture2.get();
    + Callable<IMetaStoreClient> worker1 = new GetHiveClient();
    + Callable<IMetaStoreClient> worker2 = new GetHiveClient();
    + Future<IMetaStoreClient> clientFuture1 = executor.submit(worker1);
    + Future<IMetaStoreClient> clientFuture2 = executor.submit(worker2);
    + IMetaStoreClient client1 = clientFuture1.get();
    + IMetaStoreClient client2 = clientFuture2.get();
          assertNotNull(client1);
          assertNotNull(client2);
          assertNotSame(client1, client2);
    @@ -155,9 +154,9 @@ public class TestHiveClientCache {
        @Test
        public void testCloseAllClients() throws IOException, MetaException, LoginException {
          final HiveClientCache cache = new HiveClientCache(1000);
    - HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client1 = cache.get(hiveConf);
          hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different
    - HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf);
    + HiveClientCache.ICacheableMetaStoreClient client2 = cache.get(hiveConf);
          cache.closeAllClientsQuietly();
          assertTrue(client1.isClosed());
          assertTrue(client2.isClosed());

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
    index c98d947..f437079 100644
    --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
    +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
    @@ -147,7 +147,7 @@ public abstract class HCatMapReduceTest extends HCatBaseTest {
          // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time
          // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the
          // tearDown() of the previous test
    - HCatUtil.getHiveClient(hiveConf);
    + HCatUtil.getHiveMetastoreClient(hiveConf);

          MapCreate.writeCount = 0;
          MapRead.readCount = 0;

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java
    index f8a0af1..735ab5f 100644
    --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java
    +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java
    @@ -109,7 +109,7 @@ public class TestPassProperties {
            new FileOutputCommitterContainer(job, null).cleanupJob(job);
          } catch (Exception e) {
            caughtException = true;
    - assertTrue(e.getMessage().contains(
    + assertTrue(e.getCause().getMessage().contains(
                "Could not connect to meta store using any of the URIs provided"));
          }
          assertTrue(caughtException);

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
    index 48a40b1..337f4fb 100644
    --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
    +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
    @@ -24,7 +24,6 @@ import java.sql.Date;
      import java.sql.Timestamp;
      import java.util.ArrayList;
      import java.util.Arrays;
    -import java.util.Calendar;
      import java.util.HashMap;
      import java.util.List;
      import java.util.Map;
    @@ -36,7 +35,7 @@ import org.apache.hadoop.hive.common.type.HiveChar;
      import org.apache.hadoop.hive.common.type.HiveDecimal;
      import org.apache.hadoop.hive.common.type.HiveVarchar;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
      import org.apache.hadoop.hive.ql.metadata.Table;
    @@ -63,7 +62,6 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
      import org.apache.pig.impl.util.UDFContext;
      import org.apache.pig.impl.util.Utils;
      import org.joda.time.DateTime;
    -import org.joda.time.DateTimeZone;
      import org.slf4j.Logger;
      import org.slf4j.LoggerFactory;

    @@ -141,7 +139,7 @@ class PigHCatUtil {
          return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL);
        }

    - private static HiveMetaStoreClient getHiveMetaClient(String serverUri,
    + private static IMetaStoreClient getHiveMetaClient(String serverUri,
                                   String serverKerberosPrincipal,
                                   Class<?> clazz,
                                   Job job) throws Exception {
    @@ -163,7 +161,7 @@ class PigHCatUtil {
          }

          try {
    - return HCatUtil.getHiveClient(hiveConf);
    + return HCatUtil.getHiveMetastoreClient(hiveConf);
          } catch (Exception e) {
            throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e);
          }
    @@ -203,7 +201,7 @@ class PigHCatUtil {
          String dbName = dbTablePair.first;
          String tableName = dbTablePair.second;
          Table table = null;
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
          try {
            client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class, job);
            table = HCatUtil.getTable(client, dbName, tableName);

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
    index 1c85ab5..ed46bca 100644
    --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
    +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/AbstractRecordWriter.java
    @@ -24,7 +24,7 @@ import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.hive.common.JavaUtils;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.MetaException;
      import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
      import org.apache.hadoop.hive.metastore.api.Table;
    @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.io.RecordUpdater;
      import org.apache.hadoop.hive.serde2.SerDe;
      import org.apache.hadoop.hive.serde2.SerDeException;
      import org.apache.hadoop.util.ReflectionUtils;
    +import org.apache.hive.hcatalog.common.HCatUtil;
      import org.apache.thrift.TException;

      import java.io.IOException;
    @@ -46,7 +47,7 @@ abstract class AbstractRecordWriter implements RecordWriter {
        final HiveEndPoint endPoint;
        final Table tbl;

    - final HiveMetaStoreClient msClient;
    + final IMetaStoreClient msClient;
        RecordUpdater updater = null;

        private final int totalBuckets;
    @@ -62,7 +63,7 @@ abstract class AbstractRecordWriter implements RecordWriter {
          this.conf = conf!=null ? conf
                      : HiveEndPoint.createHiveConf(DelimitedInputWriter.class, endPoint.metaStoreUri);
          try {
    - msClient = new HiveMetaStoreClient(this.conf);
    + msClient = HCatUtil.getHiveMetastoreClient(this.conf);
            this.tbl = msClient.getTable(endPoint.database, endPoint.table);
            this.partitionPath = getPathForEndPoint(msClient, endPoint);
            this.totalBuckets = tbl.getSd().getNumBuckets();
    @@ -80,6 +81,8 @@ abstract class AbstractRecordWriter implements RecordWriter {
            throw new StreamingException(e.getMessage(), e);
          } catch (ClassNotFoundException e) {
            throw new StreamingException(e.getMessage(), e);
    + } catch (IOException e) {
    + throw new StreamingException(e.getMessage(), e);
          }
        }

    @@ -147,7 +150,7 @@ abstract class AbstractRecordWriter implements RecordWriter {
          }
        }

    - private Path getPathForEndPoint(HiveMetaStoreClient msClient, HiveEndPoint endPoint)
    + private Path getPathForEndPoint(IMetaStoreClient msClient, HiveEndPoint endPoint)
                throws StreamingException {
          try {
            String location;

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
    index a08f2f9..3c25486 100644
    --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
    +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
    @@ -22,7 +22,6 @@ import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.cli.CliSessionState;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.LockComponentBuilder;
      import org.apache.hadoop.hive.metastore.LockRequestBuilder;
    @@ -40,6 +39,7 @@ import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
      import org.apache.hadoop.hive.ql.CommandNeedRetryException;
      import org.apache.hadoop.hive.ql.Driver;
      import org.apache.hadoop.hive.ql.session.SessionState;
    +import org.apache.hive.hcatalog.common.HCatUtil;

      import org.apache.hadoop.security.UserGroupInformation;
      import org.apache.thrift.TException;
    @@ -445,10 +445,13 @@ public class HiveEndPoint {
              conf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL,true);
            }
            try {
    - return new HiveMetaStoreClient(conf);
    + return HCatUtil.getHiveMetastoreClient(conf);
            } catch (MetaException e) {
              throw new ConnectionError("Error connecting to Hive Metastore URI: "
    - + endPoint.metaStoreUri, e);
    + + endPoint.metaStoreUri + ". " + e.getMessage(), e);
    + } catch (IOException e) {
    + throw new ConnectionError("Error connecting to Hive Metastore URI: "
    + + endPoint.metaStoreUri + ". " + e.getMessage(), e);
            }
          }


    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
    index 3b2cd38..3a69581 100644
    --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
    +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java
    @@ -34,7 +34,6 @@ import org.apache.hadoop.hive.common.ObjectPair;
      import org.apache.hadoop.hive.common.classification.InterfaceAudience;
      import org.apache.hadoop.hive.common.classification.InterfaceStability;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
      import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.MetaStoreUtils;
      import org.apache.hadoop.hive.metastore.TableType;
    @@ -85,7 +84,7 @@ import javax.annotation.Nullable;
      public class HCatClientHMSImpl extends HCatClient {

        private static final Logger LOG = LoggerFactory.getLogger(HCatClientHMSImpl.class);
    - private HiveMetaStoreClient hmsClient;
    + private IMetaStoreClient hmsClient;
        private Configuration config;
        private HiveConf hiveConfig;

    @@ -96,7 +95,9 @@ public class HCatClientHMSImpl extends HCatClient {
          try {
            dbNames = hmsClient.getDatabases(pattern);
          } catch (MetaException exp) {
    - throw new HCatException("MetaException while listing db names", exp);
    + throw new HCatException("MetaException while listing db names. " + exp.getMessage(), exp);
    + } catch (TException e) {
    + throw new HCatException("Transport Exception while listing db names. " + e.getMessage(), e);
          }
          return dbNames;
        }
    @@ -172,8 +173,12 @@ public class HCatClientHMSImpl extends HCatClient {
          try {
            tableNames = hmsClient.getTables(checkDB(dbName), tablePattern);
          } catch (MetaException e) {
    - throw new HCatException(
    - "MetaException while fetching table names.", e);
    + throw new HCatException("MetaException while fetching table names. " + e.getMessage(), e);
    + } catch (UnknownDBException e) {
    + throw new HCatException("UnknownDB " + dbName + " while fetching table names.", e);
    + } catch (TException e) {
    + throw new HCatException("Transport exception while fetching table names. "
    + + e.getMessage(), e);
          }
          return tableNames;
        }
    @@ -815,7 +820,7 @@ public class HCatClientHMSImpl extends HCatClient {
          this.config = conf;
          try {
            hiveConfig = HCatUtil.getHiveConf(config);
    - hmsClient = HCatUtil.getHiveClient(hiveConfig);
    + hmsClient = HCatUtil.getHiveMetastoreClient(hiveConfig);
          } catch (MetaException exp) {
            throw new HCatException("MetaException while creating HMS client",
              exp);

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java
    index 1b9663d..e3be5b7 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java
    @@ -26,7 +26,7 @@ import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
      import org.apache.hadoop.hive.common.classification.InterfaceAudience;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hive.hcatalog.common.HCatUtil;
      import org.apache.hive.hcatalog.templeton.tool.DelegationTokenCache;
      import org.apache.hive.hcatalog.templeton.tool.JobState;
    @@ -94,13 +94,13 @@ public class CompleteDelegator extends TempletonDelegator {
            return new CompleteBean("Callback sent");
          } finally {
            state.close();
    - HiveMetaStoreClient client = null;
    + IMetaStoreClient client = null;
            try {
              if(cancelMetastoreToken) {
                String metastoreTokenStrForm =
                        DelegationTokenCache.getStringFormTokenCache().getDelegationToken(id);
                if(metastoreTokenStrForm != null) {
    - client = HCatUtil.getHiveClient(new HiveConf());
    + client = HCatUtil.getHiveMetastoreClient(new HiveConf());
                  client.cancelDelegationToken(metastoreTokenStrForm);
                  LOG.debug("Cancelled token for jobId=" + id + " status from JT=" + jobStatus);
                  DelegationTokenCache.getStringFormTokenCache().removeDelegationToken(id);

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java
    index 8ae61a1..b4687b5 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java
    @@ -30,12 +30,13 @@ import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.hive.metastore.api.MetaException;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.security.Credentials;
      import org.apache.hadoop.security.UserGroupInformation;
      import org.apache.hadoop.security.token.Token;
    +import org.apache.hive.hcatalog.common.HCatUtil;
      import org.apache.thrift.TException;

      /**
    @@ -175,8 +176,8 @@ public class SecureProxySupport {

        private String buildHcatDelegationToken(String user)
          throws IOException, InterruptedException, MetaException, TException {
    - HiveConf c = new HiveConf();
    - final HiveMetaStoreClient client = new HiveMetaStoreClient(c);
    + final HiveConf c = new HiveConf();
    + final IMetaStoreClient client = HCatUtil.getHiveMetastoreClient(c);
          LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName());
          final TokenWrapper twrapper = new TokenWrapper();
          final UserGroupInformation ugi = UgiFactory.getUgi(user);
    @@ -184,7 +185,7 @@ public class SecureProxySupport {
            public String run()
              throws IOException, MetaException, TException {
              String u = ugi.getUserName();
    - return client.getDelegationToken(u);
    + return client.getDelegationToken(c.getUser(), u);
            }
          });
          return s;

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    ----------------------------------------------------------------------
    diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    index 1f8ebf3..349bd5c 100644
    --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java
    @@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.conf.Configured;
      import org.apache.hadoop.hive.common.classification.InterfaceAudience;
      import org.apache.hadoop.hive.conf.HiveConf;
    -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
    +import org.apache.hadoop.hive.metastore.IMetaStoreClient;
      import org.apache.hadoop.io.NullWritable;
      import org.apache.hadoop.io.Text;
      import org.apache.hadoop.mapred.JobClient;
    @@ -40,6 +40,7 @@ import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIden
      import org.apache.hadoop.security.UserGroupInformation;
      import org.apache.hadoop.security.token.Token;
      import org.apache.hadoop.util.Tool;
    +import org.apache.hive.hcatalog.common.HCatUtil;
      import org.apache.hive.hcatalog.templeton.AppConfig;
      import org.apache.hive.hcatalog.templeton.SecureProxySupport;
      import org.apache.hive.hcatalog.templeton.UgiFactory;
    @@ -176,12 +177,12 @@ public class TempletonControllerJob extends Configured implements Tool, JobSubmi
          return real.doAs(new PrivilegedExceptionAction<String>() {
            @Override
            public String run() throws IOException, TException, InterruptedException {
    - final HiveMetaStoreClient client = new HiveMetaStoreClient(c);
    + final IMetaStoreClient client = HCatUtil.getHiveMetastoreClient(c);
              return ugi.doAs(new PrivilegedExceptionAction<String>() {
                @Override
                public String run() throws IOException, TException, InterruptedException {
                  String u = ugi.getUserName();
    - return client.getDelegationToken(u);
    + return client.getDelegationToken(c.getUser(),u);
                }
              });
            }

    http://git-wip-us.apache.org/repos/asf/hive/blob/45307c10/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    ----------------------------------------------------------------------
    diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    index 129a98d..341b0ca 100644
    --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.metastore.api.TxnOpenException;
      import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
      import org.apache.thrift.TException;

    +import java.io.IOException;
      import java.util.List;
      import java.util.Map;

    @@ -1115,6 +1116,8 @@ public interface IMetaStoreClient {
         */
        void cancelDelegationToken(String tokenStrForm) throws MetaException, TException;

    + public String getTokenStrForm() throws IOException;
    +
        void createFunction(Function func)
            throws InvalidObjectException, MetaException, TException;
  • Prasanthj at May 7, 2015 at 1:21 am
    HIVE-10061: HiveConf Should not be used as part of the HS2 client side code (Hari Sankar Sivarama Subramaniyan reviewed by Vaibhav Gumashta)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c42666ed
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c42666ed
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c42666ed

    Branch: refs/heads/llap
    Commit: c42666ed5cfa2f1ff83be443aea21e8be8041b95
    Parents: fe01db2
    Author: Vaibhav Gumashta <vgumashta@apache.org>
    Authored: Mon May 4 18:12:21 2015 -0700
    Committer: Vaibhav Gumashta <vgumashta@apache.org>
    Committed: Mon May 4 18:12:21 2015 -0700

    ----------------------------------------------------------------------
      jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java | 3 +--
      .../hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java | 5 +++++
      .../org/apache/hive/service/cli/session/TestSessionHooks.java | 3 +--
      3 files changed, 7 insertions(+), 4 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/c42666ed/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
    ----------------------------------------------------------------------
    diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
    index 2b4be7f..1b2891b 100644
    --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
    +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
    @@ -58,7 +58,6 @@ import org.apache.http.impl.client.CloseableHttpClient;
      import org.apache.http.protocol.HttpContext;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    -import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hive.jdbc.Utils.JdbcConnectionParams;
      import org.apache.hive.service.auth.HiveAuthFactory;
      import org.apache.hive.service.auth.KerberosSaslHelper;
    @@ -158,7 +157,7 @@ public class HiveConnection implements java.sql.Connection {

          if (isEmbeddedMode) {
            EmbeddedThriftBinaryCLIService embeddedClient = new EmbeddedThriftBinaryCLIService();
    - embeddedClient.init(new HiveConf());
    + embeddedClient.init(null);
            client = embeddedClient;
          } else {
            // extract user/password from JDBC connection properties if its not supplied in the

    http://git-wip-us.apache.org/repos/asf/hive/blob/c42666ed/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    ----------------------------------------------------------------------
    diff --git a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    index ac63537..a57fc8f 100644
    --- a/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    +++ b/service/src/java/org/apache/hive/service/cli/thrift/EmbeddedThriftBinaryCLIService.java
    @@ -37,6 +37,11 @@ public class EmbeddedThriftBinaryCLIService extends ThriftBinaryCLIService {

        @Override
        public synchronized void init(HiveConf hiveConf) {
    + // Null HiveConf is passed in jdbc driver side code since driver side is supposed to be
    + // independent of hiveConf object. Create new HiveConf object here in this case.
    + if (hiveConf == null) {
    + hiveConf = new HiveConf();
    + }
          cliService.init(hiveConf);
          cliService.start();
          super.init(hiveConf);

    http://git-wip-us.apache.org/repos/asf/hive/blob/c42666ed/service/src/test/org/apache/hive/service/cli/session/TestSessionHooks.java
    ----------------------------------------------------------------------
    diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionHooks.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionHooks.java
    index 746987d..aaf7388 100644
    --- a/service/src/test/org/apache/hive/service/cli/session/TestSessionHooks.java
    +++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionHooks.java
    @@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger;
      import junit.framework.Assert;
      import junit.framework.TestCase;

    -import org.apache.hadoop.hive.conf.HiveConf;
      import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
      import org.apache.hive.service.auth.HiveAuthFactory;
      import org.apache.hive.service.cli.HiveSQLException;
    @@ -64,7 +63,7 @@ public class TestSessionHooks extends TestCase {
          System.setProperty(ConfVars.HIVE_SERVER2_SESSION_HOOK.varname,
              TestSessionHooks.SessionHookTest.class.getName());
          service = new EmbeddedThriftBinaryCLIService();
    - service.init(new HiveConf());
    + service.init(null);
          client = new ThriftCLIServiceClient(service);
        }
  • Prasanthj at May 7, 2015 at 1:21 am
    HIVE-7375 : Add option in test infra to compile in other profiles (like hadoop-1) (Szehon, reviewed by Xuefu and Brock)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/92d0b81e
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/92d0b81e
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/92d0b81e

    Branch: refs/heads/llap
    Commit: 92d0b81e99b4889d9a831244ab019ec84fc84408
    Parents: bb3a665
    Author: Szehon Ho <szehon@cloudera.com>
    Authored: Tue May 5 12:12:39 2015 -0700
    Committer: Szehon Ho <szehon@cloudera.com>
    Committed: Tue May 5 12:12:39 2015 -0700

    ----------------------------------------------------------------------
      .../ptest2/src/main/resources/source-prep.vm | 11 ++
      .../hive/ptest/execution/TestScripts.java | 23 ++++
      .../TestScripts.testPrepGit.approved.txt | 2 +-
      .../TestScripts.testPrepHadoop1.approved.txt | 111 +++++++++++++++++++
      .../TestScripts.testPrepNone.approved.txt | 2 +-
      .../TestScripts.testPrepSvn.approved.txt | 2 +-
      6 files changed, 148 insertions(+), 3 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/main/resources/source-prep.vm
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/main/resources/source-prep.vm b/testutils/ptest2/src/main/resources/source-prep.vm
    index babc245..97fb69c 100644
    --- a/testutils/ptest2/src/main/resources/source-prep.vm
    +++ b/testutils/ptest2/src/main/resources/source-prep.vm
    @@ -85,6 +85,17 @@ cd $workingDir/
        if [[ "${buildTool}" == "maven" ]]
        then
          rm -rf $workingDir/maven/org/apache/hive
    + #if($additionalProfiles)
    + echo "Compile check on additional Profiles: $additionalProfiles"
    + export ADDITIONAL_PROFILES=$additionalProfiles
    + for i in $(echo $ADDITIONAL_PROFILES | tr "," "\n")
    + do
    + mvn clean install -DskipTests -P$i;
    + cd itests
    + mvn clean install -DskipTests -P$i;
    + cd ..
    + done
    + #end
          mvn -B clean install -DskipTests -Dmaven.repo.local=$workingDir/maven $mavenArgs $mavenBuildArgs
          mvn -B test -Dmaven.repo.local=$workingDir/maven -Dtest=TestDummy $mavenArgs $mavenTestArgs
          cd itests

    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.java
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.java b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.java
    index 3e543a7..b55e979 100644
    --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.java
    +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.java
    @@ -151,6 +151,29 @@ public class TestScripts {
          String actual = getTemplateResult(template, templateVariables);
          Approvals.verify(actual);
        }
    +
    + @Test
    + public void testPrepHadoop1() throws Throwable {
    + Map<String, String> templateVariables = Maps.newHashMap();
    + templateVariables.put("repository", "https://svn.apache.org/repos/asf/hive/trunk");
    + templateVariables.put("repositoryName", "apache");
    + templateVariables.put("branch", "");
    + templateVariables.put("localDir", "/some/local/dir");
    + templateVariables.put("workingDir", "/some/working/dir");
    + templateVariables.put("buildTool", "maven");
    + templateVariables.put("antArgs", "-Dant=arg1");
    + templateVariables.put("buildTag", "build-1");
    + templateVariables.put("logDir", "/some/log/dir");
    + templateVariables.put("testArguments", "-Dtest=arg1");
    + templateVariables.put("clearLibraryCache", "true");
    + templateVariables.put("javaHome", "/usr/java/jdk1.7");
    + templateVariables.put("antEnvOpts", "-Dhttp.proxyHost=somehost -Dhttp.proxyPort=3128");
    + templateVariables.put("repositoryType", "svn");
    + templateVariables.put("additionalProfiles", "hadoop-1");
    + String template = readResource("source-prep.vm");
    + String actual = getTemplateResult(template, templateVariables);
    + Approvals.verify(actual);
    + }
        @Test
        public void testPrepSvn() throws Throwable {
          Map<String, String> templateVariables = Maps.newHashMap();

    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
    index 8dce26f..673614b 100644
    --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
    +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepGit.approved.txt
    @@ -84,7 +84,7 @@ cd /some/working/dir/
        if [[ "${buildTool}" == "maven" ]]
        then
          rm -rf /some/working/dir/maven/org/apache/hive
    - mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
    + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2
          mvn -B test -Dmaven.repo.local=/some/working/dir/maven -Dtest=TestDummy -X -Phadoop-1
          cd itests
          mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven -X -Phadoop-2

    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
    new file mode 100644
    index 0000000..dbb6a6d
    --- /dev/null
    +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepHadoop1.approved.txt
    @@ -0,0 +1,111 @@
    +# Licensed to the Apache Software Foundation (ASF) under one or more
    +# contributor license agreements. See the NOTICE file distributed with
    +# this work for additional information regarding copyright ownership.
    +# The ASF licenses this file to You under the Apache License, Version 2.0
    +# (the "License"); you may not use this file except in compliance with
    +# the License. You may obtain a copy of the License at
    +#
    +# http://www.apache.org/licenses/LICENSE-2.0
    +#
    +# Unless required by applicable law or agreed to in writing, software
    +# distributed under the License is distributed on an "AS IS" BASIS,
    +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +# See the License for the specific language governing permissions and
    +# limitations under the License.
    +
    +set -e
    +set -x
    +if [[ -n "/usr/java/jdk1.7" ]]
    +then
    + export JAVA_HOME=/usr/java/jdk1.7
    + export PATH=$JAVA_HOME/bin/:$PATH
    +fi
    +export ANT_OPTS="-Xmx1g -XX:MaxPermSize=256m -Dhttp.proxyHost=somehost -Dhttp.proxyPort=3128"
    +export M2_OPTS="-Xmx1g -XX:MaxPermSize=256m ${mavenEnvOpts}"
    +cd /some/working/dir/
    +(
    + if [[ "true" == "true" ]]
    + then
    + rm -rf ivy maven
    + fi
    + mkdir -p maven ivy
    + if [[ "svn" = "svn" ]]
    + then
    + if [[ -n "" ]]
    + then
    + echo "Illegal argument for svn: branch ''."
    + exit 1
    + fi
    + if [[ -d apache-source ]] && [[ ! -d apache-source/.svn ]]
    + then
    + rm -rf apache-source
    + fi
    + if [[ ! -d apache-source ]]
    + then
    + svn co https://svn.apache.org/repos/asf/hive/trunk apache-source
    + fi
    + cd apache-source
    + svn revert -R .
    + rm -rf $(svn status --no-ignore | egrep -v '^X|^Performing status on external' | awk '{print $2}')
    + svn update
    + elif [[ "svn" = "git" ]]
    + then
    + if [[ -z "" ]]
    + then
    + echo "Illegal argument for git: branch name is required."
    + exit 1
    + fi
    + if [[ -d apache-source ]] && [[ ! -d apache-source/.git ]]
    + then
    + rm -rf apache-source
    + fi
    + if [[ ! -d apache-source ]]
    + then
    + git clone https://svn.apache.org/repos/asf/hive/trunk apache-source
    + fi
    + cd apache-source
    + git fetch origin
    + git reset --hard HEAD && git clean -f -d
    + git checkout || git checkout -b origin/
    + git reset --hard origin/
    + git merge --ff-only origin/
    + git gc
    + else
    + echo "Unknown repository type 'svn'"
    + exit 1
    + fi
    + patchCommandPath=/some/working/dir/scratch/smart-apply-patch.sh
    + patchFilePath=/some/working/dir/scratch/build.patch
    + if [[ -f $patchFilePath ]]
    + then
    + chmod +x $patchCommandPath
    + $patchCommandPath $patchFilePath
    + fi
    + if [[ "maven" == "maven" ]]
    + then
    + rm -rf /some/working/dir/maven/org/apache/hive
    + echo "Compile check on additional Profiles: hadoop-1"
    + export ADDITIONAL_PROFILES=hadoop-1
    + for i in $(echo $ADDITIONAL_PROFILES | tr "," "\n")
    + do
    + mvn clean install -DskipTests -P$i;
    + cd itests
    + mvn clean install -DskipTests -P$i;
    + cd ..
    + done
    + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
    + mvn -B test -Dmaven.repo.local=/some/working/dir/maven -Dtest=TestDummy $mavenArgs $mavenTestArgs
    + cd itests
    + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
    + mvn -B test -Dmaven.repo.local=/some/working/dir/maven -Dtest=TestDummy $mavenArgs $mavenTestArgs
    + elif [[ "maven" == "ant" ]]
    + then
    + ant -Dant=arg1 -Divy.default.ivy.user.dir=/some/working/dir/ivy \
    + -Dmvn.local.repo=/some/working/dir/maven clean package test \
    + -Dtestcase=nothing
    + else
    + echo "Unknown build tool maven"
    + exit 127
    + fi
    +) 2>&1 | tee /some/log/dir/source-prep.txt
    +exit ${PIPESTATUS[0]}

    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepNone.approved.txt
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepNone.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepNone.approved.txt
    index 98afe93..c05dae9 100644
    --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepNone.approved.txt
    +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepNone.approved.txt
    @@ -84,7 +84,7 @@ cd /some/working/dir/
        if [[ "ant" == "maven" ]]
        then
          rm -rf /some/working/dir/maven/org/apache/hive
    - mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
    + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
          mvn -B test -Dmaven.repo.local=/some/working/dir/maven -Dtest=TestDummy $mavenArgs $mavenTestArgs
          cd itests
          mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs

    http://git-wip-us.apache.org/repos/asf/hive/blob/92d0b81e/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepSvn.approved.txt
    ----------------------------------------------------------------------
    diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepSvn.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepSvn.approved.txt
    index d267c5f..c8a4b6e 100644
    --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepSvn.approved.txt
    +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestScripts.testPrepSvn.approved.txt
    @@ -84,7 +84,7 @@ cd /some/working/dir/
        if [[ "maven" == "maven" ]]
        then
          rm -rf /some/working/dir/maven/org/apache/hive
    - mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
    + mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
          mvn -B test -Dmaven.repo.local=/some/working/dir/maven -Dtest=TestDummy $mavenArgs $mavenTestArgs
          cd itests
          mvn -B clean install -DskipTests -Dmaven.repo.local=/some/working/dir/maven $mavenArgs $mavenBuildArgs
  • Prasanthj at May 7, 2015 at 1:21 am
    Merge from trunk to llap branch - 05/06/2015 (Prasanth Jayachandran)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b8acbb6e
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b8acbb6e
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b8acbb6e

    Branch: refs/heads/llap
    Commit: b8acbb6ef7b97502b772569641917e6ef973b25e
    Parents: d755140 632a309
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Wed May 6 18:19:55 2015 -0700
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Wed May 6 18:19:55 2015 -0700

    ----------------------------------------------------------------------
      .reviewboardrc | 2 +-
      accumulo-handler/pom.xml | 2 +-
      .../src/test/templates/TestAccumuloCliDriver.vm | 19 +-
      ant/pom.xml | 2 +-
      .../apache/hadoop/hive/ant/GenVectorCode.java | 2 +
      .../apache/hadoop/hive/ant/QTestGenTask.java | 25 +
      beeline/pom.xml | 2 +-
      .../java/org/apache/hive/beeline/BeeLine.java | 10 +-
      bin/beeline.cmd | 11 +-
      bin/ext/hiveserver2.cmd | 2 +-
      bin/hive | 9 +-
      cli/pom.xml | 2 +-
      .../org/apache/hadoop/hive/cli/CliDriver.java | 2 -
      common/pom.xml | 2 +-
      .../apache/hadoop/hive/common/FileUtils.java | 155 +-
      .../apache/hadoop/hive/common/JavaUtils.java | 8 +
      .../hadoop/hive/common/StatsSetupConst.java | 5 +
      .../hive/common/jsonexplain/tez/Vertex.java | 2 +-
      .../hadoop/hive/common/type/HiveDecimal.java | 19 +-
      .../org/apache/hadoop/hive/conf/HiveConf.java | 144 +-
      .../apache/hive/common/util/BloomFilter.java | 291 +
      .../org/apache/hive/common/util/Murmur3.java | 334 +
      .../hive/common/util/TestBloomFilter.java | 458 ++
      .../apache/hive/common/util/TestMurmur3.java | 189 +
      contrib/pom.xml | 2 +-
      dev-support/jenkins-execute-build.sh | 6 +-
      dev-support/jenkins-execute-hms-test.sh | 20 +-
      hbase-handler/pom.xml | 2 +-
      .../hadoop/hive/hbase/HBaseStorageHandler.java | 8 +
      .../src/test/queries/positive/hbase_timestamp.q | 22 +-
      .../test/results/positive/hbase_timestamp.q.out | 60 +-
      .../src/test/templates/TestHBaseCliDriver.vm | 18 +-
      .../templates/TestHBaseNegativeCliDriver.vm | 19 +-
      hcatalog/core/pom.xml | 2 +-
      .../apache/hive/hcatalog/common/HCatUtil.java | 37 +-
      .../hive/hcatalog/common/HiveClientCache.java | 85 +-
      .../hive/hcatalog/data/HCatRecordSerDe.java | 9 +-
      .../apache/hive/hcatalog/data/JsonSerDe.java | 7 +-
      .../DefaultOutputCommitterContainer.java | 6 +-
      .../mapreduce/FileOutputCommitterContainer.java | 22 +-
      .../mapreduce/FileOutputFormatContainer.java | 8 +-
      .../hcatalog/mapreduce/HCatBaseInputFormat.java | 50 +-
      .../hcatalog/mapreduce/HCatOutputFormat.java | 6 +-
      .../hcatalog/mapreduce/HCatRecordReader.java | 4 +-
      .../hive/hcatalog/mapreduce/HCatSplit.java | 21 +-
      .../hive/hcatalog/mapreduce/HCatTableInfo.java | 12 +
      .../hcatalog/mapreduce/InitializeInput.java | 6 +-
      .../hive/hcatalog/mapreduce/InputJobInfo.java | 5 +
      .../hive/hcatalog/mapreduce/InternalUtil.java | 2 +
      .../hive/hcatalog/mapreduce/PartInfo.java | 117 +-
      .../hive/hcatalog/mapreduce/Security.java | 10 +-
      .../mapreduce/TaskCommitContextRegistry.java | 6 +-
      .../hcatalog/common/TestHiveClientCache.java | 37 +-
      .../hcatalog/mapreduce/HCatMapReduceTest.java | 3 +-
      .../mapreduce/TestHCatOutputFormat.java | 5 +-
      .../hcatalog/mapreduce/TestHCatPartitioned.java | 32 +-
      .../hcatalog/mapreduce/TestPassProperties.java | 2 +-
      hcatalog/hcatalog-pig-adapter/pom.xml | 2 +-
      .../apache/hive/hcatalog/pig/PigHCatUtil.java | 10 +-
      .../hcatalog/pig/TestHCatLoaderEncryption.java | 3 +-
      hcatalog/pom.xml | 2 +-
      hcatalog/server-extensions/pom.xml | 2 +-
      .../listener/DbNotificationListener.java | 6 +-
      .../hcatalog/listener/NotificationListener.java | 23 +-
      .../messaging/AlterPartitionMessage.java | 6 +-
      .../hive/hcatalog/messaging/InsertMessage.java | 7 +-
      .../hive/hcatalog/messaging/MessageFactory.java | 10 +-
      .../json/JSONAlterPartitionMessage.java | 14 +-
      .../messaging/json/JSONAlterTableMessage.java | 5 +
      .../messaging/json/JSONInsertMessage.java | 15 +-
      .../messaging/json/JSONMessageFactory.java | 14 +-
      .../listener/TestNotificationListener.java | 8 +-
      hcatalog/src/test/e2e/templeton/README.txt | 13 +
      hcatalog/src/test/e2e/templeton/build.xml | 29 +
      .../deployers/config/hive/hive-log4j.properties | 88 +
      .../deployers/config/hive/hive-site.mysql.xml | 77 +
      .../webhcat/webhcat-site.updateConfig.xml | 118 +
      .../deployers/config/webhcat/webhcat-site.xml | 9 +-
      .../templeton/deployers/deploy_e2e_artifacts.sh | 6 +
      .../src/test/e2e/templeton/deployers/env.sh | 7 +
      .../deployers/modify_webhcat_config.sh | 40 +
      .../deployers/restore_webhcat_config.sh | 36 +
      .../templeton/deployers/start_hive_services.sh | 7 +
      .../templeton/tests/modifyConfiguration.conf | 67 +
      hcatalog/streaming/pom.xml | 2 +-
      .../streaming/AbstractRecordWriter.java | 11 +-
      .../hive/hcatalog/streaming/HiveEndPoint.java | 9 +-
      hcatalog/webhcat/java-client/pom.xml | 2 +-
      .../apache/hive/hcatalog/api/HCatClient.java | 18 +
      .../hive/hcatalog/api/HCatClientHMSImpl.java | 22 +-
      .../hcatalog/api/HCatNotificationEvent.java | 2 +-
      .../apache/hive/hcatalog/api/HCatPartition.java | 12 +
      .../api/repl/HCatReplicationTaskIterator.java | 81 +-
      .../hcatalog/api/repl/NoopReplicationTask.java | 9 +-
      .../hive/hcatalog/api/repl/ReplicationTask.java | 103 +-
      .../hcatalog/api/repl/ReplicationUtils.java | 31 +-
      .../api/repl/commands/DropDatabaseCommand.java | 99 +
      .../api/repl/commands/DropPartitionCommand.java | 122 +
      .../api/repl/commands/DropTableCommand.java | 113 +
      .../api/repl/commands/ExportCommand.java | 131 +
      .../api/repl/commands/ImportCommand.java | 139 +
      .../hcatalog/api/repl/commands/NoopCommand.java | 22 +-
      .../repl/exim/AddPartitionReplicationTask.java | 111 +
      .../exim/AlterPartitionReplicationTask.java | 88 +
      .../repl/exim/AlterTableReplicationTask.java | 85 +
      .../exim/CreateDatabaseReplicationTask.java | 38 +
      .../repl/exim/CreateTableReplicationTask.java | 85 +
      .../repl/exim/DropDatabaseReplicationTask.java | 56 +
      .../repl/exim/DropPartitionReplicationTask.java | 77 +
      .../api/repl/exim/DropTableReplicationTask.java | 56 +
      .../repl/exim/EximReplicationTaskFactory.java | 63 +
      .../api/repl/exim/InsertReplicationTask.java | 99 +
      .../hive/hcatalog/api/TestHCatClient.java | 124 +-
      .../hcatalog/api/repl/CommandTestUtils.java | 107 +
      .../hcatalog/api/repl/TestReplicationTask.java | 92 +
      .../api/repl/commands/TestCommands.java | 590 ++
      .../api/repl/commands/TestNoopCommand.java | 42 +
      .../api/repl/exim/TestEximReplicationTasks.java | 600 ++
      hcatalog/webhcat/svr/pom.xml | 15 +-
      .../svr/src/main/config/webhcat-default.xml | 10 +-
      .../hcatalog/templeton/CompleteDelegator.java | 6 +-
      .../hcatalog/templeton/SecureProxySupport.java | 9 +-
      .../hcatalog/templeton/tool/LaunchMapper.java | 2 +-
      .../templeton/tool/TempletonControllerJob.java | 18 +-
      hwi/pom.xml | 2 +-
      itests/custom-serde/pom.xml | 2 +-
      itests/hcatalog-unit/pom.xml | 2 +-
      .../listener/TestDbNotificationListener.java | 75 +-
      itests/hive-jmh/pom.xml | 2 +-
      .../vectorization/VectorizationBench.java | 146 +-
      itests/hive-minikdc/pom.xml | 2 +-
      .../hive/minikdc/TestJdbcWithMiniKdcCookie.java | 102 +
      itests/hive-unit-hadoop2/pom.xml | 2 +-
      .../apache/hive/jdbc/TestSchedulerQueue.java | 3 +-
      itests/hive-unit/pom.xml | 131 +-
      .../org/apache/hive/jdbc/miniHS2/MiniHS2.java | 58 +-
      .../hive/metastore/TestHiveMetaStore.java | 30 +
      .../hadoop/hive/metastore/TestHiveMetaTool.java | 17 +-
      .../metastore/TestMetaStoreEventListener.java | 14 +-
      .../hive/beeline/TestBeeLineWithArgs.java | 12 +
      .../jdbc/TestJdbcWithLocalClusterSpark.java | 3 +-
      .../apache/hive/jdbc/TestJdbcWithMiniMr.java | 3 +-
      ...stMultiSessionsHS2WithLocalClusterSpark.java | 3 +-
      .../TestJdbcWithSQLAuthorization.java | 25 +
      .../operation/OperationLoggingAPITestBase.java | 235 +
      .../cli/operation/TestOperationLoggingAPI.java | 379 --
      .../TestOperationLoggingAPIWithMr.java | 168 +
      .../TestOperationLoggingAPIWithTez.java | 54 +
      .../cli/thrift/TestThriftHttpCLIService.java | 74 +-
      itests/pom.xml | 102 +-
      itests/qtest-spark/pom.xml | 2 +-
      itests/qtest/pom.xml | 2 +-
      .../test/resources/testconfiguration.properties | 41 +-
      itests/test-serde/pom.xml | 2 +-
      itests/util/pom.xml | 2 +-
      .../org/apache/hadoop/hive/ql/QTestUtil.java | 54 +-
      .../generic/GenericUDFTestGetJavaBoolean.java | 8 +-
      jdbc/pom.xml | 2 +-
      .../org/apache/hive/jdbc/HiveConnection.java | 101 +-
      .../hive/jdbc/HttpBasicAuthInterceptor.java | 49 +-
      .../jdbc/HttpKerberosRequestInterceptor.java | 62 +-
      .../hive/jdbc/HttpRequestInterceptorBase.java | 71 +
      jdbc/src/java/org/apache/hive/jdbc/Utils.java | 22 +-
      .../hive/jdbc/ZooKeeperHiveClientHelper.java | 2 +-
      metastore/bin/.gitignore | 3 +-
      metastore/dbs/derby/execute.sh | 37 +
      metastore/dbs/derby/prepare.sh | 63 +
      metastore/dbs/postgres/execute.sh | 29 +
      metastore/dbs/postgres/prepare.sh | 72 +
      metastore/pom.xml | 2 +-
      .../upgrade/derby/hive-schema-1.3.0.derby.sql | 336 +
      .../derby/upgrade-1.2.0-to-1.3.0.derby.sql | 3 +
      .../scripts/upgrade/derby/upgrade.order.derby | 1 +
      .../upgrade/mssql/006-HIVE-9456.mssql.sql | 323 +
      .../upgrade/mssql/hive-schema-1.2.0.mssql.sql | 256 +-
      .../upgrade/mssql/hive-schema-1.3.0.mssql.sql | 947 +++
      .../mssql/upgrade-1.1.0-to-1.2.0.mssql.sql | 1 +
      .../mssql/upgrade-1.2.0-to-1.3.0.mssql.sql | 5 +
      .../scripts/upgrade/mssql/upgrade.order.mssql | 1 +
      .../upgrade/mysql/021-HIVE-7018.mysql.sql | 53 -
      .../upgrade/mysql/hive-schema-1.2.0.mysql.sql | 10 +-
      .../upgrade/mysql/hive-schema-1.3.0.mysql.sql | 840 +++
      .../mysql/upgrade-1.1.0-to-1.2.0.mysql.sql | 2 +-
      .../mysql/upgrade-1.2.0-to-1.3.0.mysql.sql | 4 +
      .../scripts/upgrade/mysql/upgrade.order.mysql | 1 +
      .../upgrade/oracle/hive-schema-1.3.0.oracle.sql | 788 +++
      .../oracle/upgrade-1.2.0-to-1.3.0.oracle.sql | 4 +
      .../scripts/upgrade/oracle/upgrade.order.oracle | 1 +
      .../postgres/hive-schema-1.3.0.postgres.sql | 1493 +++++
      .../upgrade-1.2.0-to-1.3.0.postgres.sql | 12 +
      .../upgrade/postgres/upgrade.order.postgres | 1 +
      .../hive/metastore/AggregateStatsCache.java | 575 ++
      .../hive/metastore/HiveMetaStoreClient.java | 26 +-
      .../hadoop/hive/metastore/IMetaStoreClient.java | 11 +
      .../hive/metastore/MetaStoreDirectSql.java | 67 +-
      .../hadoop/hive/metastore/MetaStoreUtils.java | 21 +
      .../hadoop/hive/metastore/ObjectStore.java | 105 +-
      .../hive/metastore/RetryingMetaStoreClient.java | 43 +-
      .../metastore/events/DropPartitionEvent.java | 14 +-
      .../hive/metastore/events/InsertEvent.java | 31 +-
      .../metastore/events/PreDropPartitionEvent.java | 23 +-
      .../hive/metastore/tools/HiveMetaTool.java | 35 +-
      .../hadoop/hive/metastore/txn/TxnHandler.java | 186 +-
      .../hive/metastore/TestAggregateStatsCache.java | 266 +
      .../hive/metastore/txn/TestTxnHandler.java | 1 +
      odbc/pom.xml | 2 +-
      packaging/pom.xml | 2 +-
      packaging/src/main/assembly/bin.xml | 1 +
      pom.xml | 25 +-
      ql/.gitignore | 2 +
      ql/pom.xml | 18 +-
      .../ExpressionTemplates/ColumnDivideColumn.txt | 26 +-
      .../java/org/apache/hadoop/hive/ql/Driver.java | 2 +-
      .../org/apache/hadoop/hive/ql/ErrorMsg.java | 1 +
      .../hive/ql/exec/CommonMergeJoinOperator.java | 50 +-
      .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 96 +-
      .../hadoop/hive/ql/exec/FetchOperator.java | 8 +-
      .../apache/hadoop/hive/ql/exec/FetchTask.java | 2 +
      .../hadoop/hive/ql/exec/FileSinkOperator.java | 47 +-
      .../hadoop/hive/ql/exec/FunctionRegistry.java | 16 +-
      .../hadoop/hive/ql/exec/HashTableLoader.java | 4 +-
      .../hadoop/hive/ql/exec/JoinOperator.java | 3 +-
      .../hadoop/hive/ql/exec/MapJoinOperator.java | 296 +-
      .../apache/hadoop/hive/ql/exec/ObjectCache.java | 7 +
      .../apache/hadoop/hive/ql/exec/Operator.java | 4 +
      .../hadoop/hive/ql/exec/OperatorFactory.java | 22 +-
      .../apache/hadoop/hive/ql/exec/RowSchema.java | 9 +
      .../hadoop/hive/ql/exec/SecureCmdDoAs.java | 5 +-
      .../ql/exec/SparkHashTableSinkOperator.java | 35 +-
      .../apache/hadoop/hive/ql/exec/Utilities.java | 54 +-
      .../hadoop/hive/ql/exec/mr/ExecDriver.java | 1 +
      .../hadoop/hive/ql/exec/mr/HashTableLoader.java | 2 +-
      .../hadoop/hive/ql/exec/mr/ObjectCache.java | 5 +
      .../persistence/BytesBytesMultiHashMap.java | 11 +-
      .../exec/persistence/HybridHashTableConf.java | 86 +
      .../persistence/HybridHashTableContainer.java | 351 +-
      .../ql/exec/persistence/KeyValueContainer.java | 31 +-
      .../persistence/MapJoinBytesTableContainer.java | 88 +-
      .../hive/ql/exec/persistence/MapJoinKey.java | 14 +
      .../MapJoinTableContainerDirectAccess.java | 31 +
      .../ql/exec/persistence/ObjectContainer.java | 31 +-
      .../ReusableGetAdaptorDirectAccess.java | 30 +
      .../hive/ql/exec/spark/HashTableLoader.java | 2 +-
      .../ql/exec/spark/HiveSparkClientFactory.java | 28 +-
      .../hive/ql/exec/spark/KryoSerializer.java | 4 +-
      .../ql/exec/spark/RemoteHiveSparkClient.java | 28 +-
      .../ql/exec/spark/SparkMapRecordHandler.java | 1 +
      .../hive/ql/exec/spark/SparkPlanGenerator.java | 51 +-
      .../ql/exec/spark/SparkReduceRecordHandler.java | 1 +
      .../hadoop/hive/ql/exec/spark/SparkTask.java | 16 +
      .../hive/ql/exec/spark/SparkUtilities.java | 27 +-
      .../spark/status/impl/JobMetricsListener.java | 12 +
      .../hadoop/hive/ql/exec/tez/DagUtils.java | 1 +
      .../hive/ql/exec/tez/HashTableLoader.java | 73 +-
      .../hive/ql/exec/tez/LlapObjectCache.java | 6 +
      .../hadoop/hive/ql/exec/tez/ObjectCache.java | 6 +
      .../hive/ql/exec/tez/ReduceRecordSource.java | 148 +-
      .../hadoop/hive/ql/exec/tez/TezJobMonitor.java | 4 +-
      .../hive/ql/exec/tez/TezSessionState.java | 13 +-
      .../apache/hadoop/hive/ql/exec/tez/TezTask.java | 3 +-
      .../vector/VectorAppMasterEventOperator.java | 123 +-
      .../ql/exec/vector/VectorColumnMapping.java | 6 +-
      .../ql/exec/vector/VectorColumnOrderedMap.java | 17 +-
      .../exec/vector/VectorColumnOutputMapping.java | 4 +
      .../ql/exec/vector/VectorColumnSetInfo.java | 3 +-
      .../exec/vector/VectorColumnSourceMapping.java | 4 +
      .../hive/ql/exec/vector/VectorCopyRow.java | 2 +-
      .../ql/exec/vector/VectorDeserializeRow.java | 13 +-
      .../ql/exec/vector/VectorFileSinkOperator.java | 78 +-
      .../ql/exec/vector/VectorGroupByOperator.java | 26 +-
      .../ql/exec/vector/VectorMapJoinOperator.java | 64 +-
      .../exec/vector/VectorReduceSinkOperator.java | 83 +-
      .../exec/vector/VectorSMBMapJoinOperator.java | 21 +-
      .../hive/ql/exec/vector/VectorSerializeRow.java | 17 +
      .../exec/vector/VectorSerializeRowNoNulls.java | 17 +
      .../ql/exec/vector/VectorizationContext.java | 38 +-
      .../ql/exec/vector/VectorizedBatchUtil.java | 49 +-
      .../ql/exec/vector/VectorizedRowBatchCtx.java | 4 +-
      .../expressions/ConstantVectorExpression.java | 3 -
      .../vector/expressions/IdentityExpression.java | 9 +
      .../ql/exec/vector/expressions/MathExpr.java | 9 +-
      .../VectorExpressionWriterFactory.java | 29 +-
      .../mapjoin/VectorMapJoinCommonOperator.java | 772 +++
      .../VectorMapJoinGenerateResultOperator.java | 824 +++
      ...pJoinInnerBigOnlyGenerateResultOperator.java | 343 +
      .../VectorMapJoinInnerBigOnlyLongOperator.java | 383 ++
      ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 396 ++
      ...VectorMapJoinInnerBigOnlyStringOperator.java | 372 ++
      ...ectorMapJoinInnerGenerateResultOperator.java | 251 +
      .../mapjoin/VectorMapJoinInnerLongOperator.java | 382 ++
      .../VectorMapJoinInnerMultiKeyOperator.java | 394 ++
      .../VectorMapJoinInnerStringOperator.java | 371 ++
      ...orMapJoinLeftSemiGenerateResultOperator.java | 233 +
      .../VectorMapJoinLeftSemiLongOperator.java | 371 ++
      .../VectorMapJoinLeftSemiMultiKeyOperator.java | 383 ++
      .../VectorMapJoinLeftSemiStringOperator.java | 356 +
      ...ectorMapJoinOuterGenerateResultOperator.java | 618 ++
      .../mapjoin/VectorMapJoinOuterLongOperator.java | 380 ++
      .../VectorMapJoinOuterMultiKeyOperator.java | 402 ++
      .../VectorMapJoinOuterStringOperator.java | 372 ++
      .../mapjoin/VectorMapJoinRowBytesContainer.java | 318 +
      .../fast/VectorMapJoinFastBytesHashMap.java | 101 +
      .../VectorMapJoinFastBytesHashMultiSet.java | 93 +
      .../fast/VectorMapJoinFastBytesHashSet.java | 85 +
      .../fast/VectorMapJoinFastBytesHashTable.java | 221 +
      .../fast/VectorMapJoinFastBytesHashUtil.java | 41 +
      .../mapjoin/fast/VectorMapJoinFastHashMap.java | 38 +
      .../fast/VectorMapJoinFastHashMultiSet.java | 48 +
      .../mapjoin/fast/VectorMapJoinFastHashSet.java | 44 +
      .../fast/VectorMapJoinFastHashTable.java | 68 +
      .../fast/VectorMapJoinFastHashTableLoader.java | 114 +
      .../fast/VectorMapJoinFastIntHashUtil.java | 32 +
      .../mapjoin/fast/VectorMapJoinFastKeyStore.java | 173 +
      .../fast/VectorMapJoinFastLongHashMap.java | 94 +
      .../fast/VectorMapJoinFastLongHashMultiSet.java | 91 +
      .../fast/VectorMapJoinFastLongHashSet.java | 84 +
      .../fast/VectorMapJoinFastLongHashTable.java | 284 +
      .../fast/VectorMapJoinFastLongHashUtil.java | 63 +
      .../fast/VectorMapJoinFastMultiKeyHashMap.java | 39 +
      .../VectorMapJoinFastMultiKeyHashMultiSet.java | 32 +
      .../fast/VectorMapJoinFastMultiKeyHashSet.java | 32 +
      .../fast/VectorMapJoinFastStringCommon.java | 67 +
      .../fast/VectorMapJoinFastStringHashMap.java | 44 +
      .../VectorMapJoinFastStringHashMultiSet.java | 44 +
      .../fast/VectorMapJoinFastStringHashSet.java | 44 +
      .../fast/VectorMapJoinFastTableContainer.java | 222 +
      .../fast/VectorMapJoinFastValueStore.java | 557 ++
      .../hashtable/VectorMapJoinBytesHashMap.java | 51 +
      .../VectorMapJoinBytesHashMultiSet.java | 51 +
      .../hashtable/VectorMapJoinBytesHashSet.java | 51 +
      .../hashtable/VectorMapJoinBytesHashTable.java | 26 +
      .../mapjoin/hashtable/VectorMapJoinHashMap.java | 34 +
      .../hashtable/VectorMapJoinHashMapResult.java | 63 +
      .../hashtable/VectorMapJoinHashMultiSet.java | 31 +
      .../VectorMapJoinHashMultiSetResult.java | 34 +
      .../mapjoin/hashtable/VectorMapJoinHashSet.java | 34 +
      .../hashtable/VectorMapJoinHashSetResult.java | 28 +
      .../hashtable/VectorMapJoinHashTable.java | 43 +
      .../hashtable/VectorMapJoinHashTableResult.java | 81 +
      .../hashtable/VectorMapJoinLongHashMap.java | 46 +
      .../VectorMapJoinLongHashMultiSet.java | 46 +
      .../hashtable/VectorMapJoinLongHashSet.java | 46 +
      .../hashtable/VectorMapJoinLongHashTable.java | 31 +
      .../hashtable/VectorMapJoinTableContainer.java | 28 +
      .../VectorMapJoinOptimizedCreateHashTable.java | 129 +
      .../VectorMapJoinOptimizedHashMap.java | 128 +
      .../VectorMapJoinOptimizedHashMultiSet.java | 103 +
      .../VectorMapJoinOptimizedHashSet.java | 78 +
      .../VectorMapJoinOptimizedHashTable.java | 95 +
      .../VectorMapJoinOptimizedLongCommon.java | 171 +
      .../VectorMapJoinOptimizedLongHashMap.java | 82 +
      .../VectorMapJoinOptimizedLongHashMultiSet.java | 83 +
      .../VectorMapJoinOptimizedLongHashSet.java | 83 +
      .../VectorMapJoinOptimizedMultiKeyHashMap.java | 36 +
      ...torMapJoinOptimizedMultiKeyHashMultiSet.java | 36 +
      .../VectorMapJoinOptimizedMultiKeyHashSet.java | 36 +
      .../VectorMapJoinOptimizedStringCommon.java | 98 +
      .../VectorMapJoinOptimizedStringHashMap.java | 63 +
      ...ectorMapJoinOptimizedStringHashMultiSet.java | 64 +
      .../VectorMapJoinOptimizedStringHashSet.java | 63 +
      .../hadoop/hive/ql/history/HiveHistory.java | 3 +-
      .../hive/ql/io/CombineHiveInputFormat.java | 4 +
      .../hadoop/hive/ql/io/filters/BloomFilter.java | 298 -
      .../hive/ql/io/filters/BloomFilterIO.java | 44 +
      .../hadoop/hive/ql/io/filters/Murmur3.java | 334 -
      .../hive/ql/io/orc/ColumnStatisticsImpl.java | 19 +-
      .../hive/ql/io/orc/DateColumnStatistics.java | 6 +-
      .../apache/hadoop/hive/ql/io/orc/FileDump.java | 8 +-
      .../hadoop/hive/ql/io/orc/IntegerWriter.java | 5 -
      .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 4 +-
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 234 +-
      .../hive/ql/io/orc/RecordReaderUtils.java | 12 +-
      .../hive/ql/io/orc/RunLengthIntegerWriter.java | 9 +-
      .../ql/io/orc/RunLengthIntegerWriterV2.java | 11 +-
      .../hive/ql/io/orc/TreeReaderFactory.java | 98 +-
      .../hadoop/hive/ql/io/orc/WriterImpl.java | 15 +-
      .../ql/io/parquet/MapredParquetInputFormat.java | 7 +-
      .../io/parquet/MapredParquetOutputFormat.java | 7 +-
      .../parquet/VectorizedParquetInputFormat.java | 2 +-
      .../read/ParquetRecordReaderWrapper.java | 7 +-
      .../write/ParquetRecordWriterWrapper.java | 7 +-
      .../hive/ql/io/sarg/SearchArgumentImpl.java | 85 +-
      .../hadoop/hive/ql/lib/PreOrderWalker.java | 8 +
      .../hadoop/hive/ql/lockmgr/DbLockManager.java | 41 +-
      .../hadoop/hive/ql/lockmgr/DbTxnManager.java | 22 +-
      .../apache/hadoop/hive/ql/metadata/Hive.java | 26 +-
      .../apache/hadoop/hive/ql/metadata/Table.java | 18 +-
      .../BucketingSortingReduceSinkOptimizer.java | 13 +
      .../ql/optimizer/ColumnPrunerProcFactory.java | 25 +-
      .../optimizer/ConstantPropagateProcFactory.java | 13 +-
      .../hive/ql/optimizer/ConvertJoinMapJoin.java | 2 +
      .../ql/optimizer/LimitPushdownOptimizer.java | 9 +-
      .../ql/optimizer/NonBlockingOpDeDupProc.java | 2 +-
      .../hadoop/hive/ql/optimizer/Optimizer.java | 18 +-
      .../ql/optimizer/calcite/HiveCalciteUtil.java | 398 +-
      .../ql/optimizer/calcite/HiveConfigContext.java | 37 +
      .../calcite/HiveDefaultRelMetadataProvider.java | 55 +-
      .../ql/optimizer/calcite/HiveRelCollation.java | 16 +
      .../optimizer/calcite/HiveRelDistribution.java | 80 +
      .../ql/optimizer/calcite/HiveRelOptUtil.java | 414 ++
      .../ql/optimizer/calcite/RelOptHiveTable.java | 217 +-
      .../calcite/cost/HiveAlgorithmsConf.java | 39 +
      .../calcite/cost/HiveAlgorithmsUtil.java | 363 ++
      .../ql/optimizer/calcite/cost/HiveCost.java | 40 +-
      .../optimizer/calcite/cost/HiveCostModel.java | 102 +
      .../ql/optimizer/calcite/cost/HiveCostUtil.java | 43 -
      .../calcite/cost/HiveDefaultCostModel.java | 123 +
      .../calcite/cost/HiveOnTezCostModel.java | 635 ++
      .../optimizer/calcite/cost/HiveRelMdCost.java | 71 +
      .../calcite/cost/HiveVolcanoPlanner.java | 9 +-
      .../calcite/reloperators/HiveAggregate.java | 10 +-
      .../calcite/reloperators/HiveFilter.java | 4 +-
      .../calcite/reloperators/HiveJoin.java | 151 +-
      .../calcite/reloperators/HiveLimit.java | 4 +-
      .../calcite/reloperators/HiveProject.java | 4 +-
      .../calcite/reloperators/HiveSortExchange.java | 66 +
      .../calcite/reloperators/HiveTableScan.java | 126 +-
      .../rules/HiveInsertExchange4JoinRule.java | 134 +
      .../calcite/rules/HiveJoinAddNotNullRule.java | 197 +
      .../calcite/rules/HiveJoinToMultiJoinRule.java | 333 +
      .../calcite/stats/HiveRelMdCollation.java | 67 +
      .../calcite/stats/HiveRelMdDistribution.java | 56 +
      .../calcite/stats/HiveRelMdMemory.java | 102 +
      .../calcite/stats/HiveRelMdParallelism.java | 117 +
      .../calcite/stats/HiveRelMdRowCount.java | 1 -
      .../optimizer/calcite/stats/HiveRelMdSize.java | 148 +
      .../calcite/stats/HiveRelMdUniqueKeys.java | 1 -
      .../calcite/translator/ASTBuilder.java | 3 +-
      .../calcite/translator/ASTConverter.java | 24 +-
      .../calcite/translator/ExprNodeConverter.java | 245 +-
      .../calcite/translator/HiveGBOpConvUtil.java | 1226 ++++
      .../calcite/translator/HiveOpConverter.java | 988 +++
      .../translator/HiveOpConverterPostProc.java | 165 +
      .../translator/PlanModifierForASTConv.java | 111 +-
      .../translator/PlanModifierForReturnPath.java | 41 +
      .../calcite/translator/PlanModifierUtil.java | 125 +
      .../calcite/translator/TypeConverter.java | 4 +-
      .../physical/GenSparkSkewJoinProcessor.java | 9 +-
      .../physical/SparkMapJoinResolver.java | 185 +-
      .../hive/ql/optimizer/physical/Vectorizer.java | 356 +-
      .../spark/SparkSkewJoinProcFactory.java | 57 +-
      .../optimizer/spark/SparkSkewJoinResolver.java | 5 +-
      .../stats/annotation/StatsRulesProcFactory.java | 4 +-
      .../hive/ql/parse/BaseSemanticAnalyzer.java | 96 +-
      .../hadoop/hive/ql/parse/CalcitePlanner.java | 283 +-
      .../ql/parse/ColumnStatsSemanticAnalyzer.java | 24 +-
      .../hive/ql/parse/DDLSemanticAnalyzer.java | 159 +-
      .../apache/hadoop/hive/ql/parse/EximUtil.java | 173 +-
      .../hive/ql/parse/ExportSemanticAnalyzer.java | 121 +-
      .../hadoop/hive/ql/parse/GenTezProcContext.java | 5 +-
      .../hadoop/hive/ql/parse/GenTezUtils.java | 5 +-
      .../apache/hadoop/hive/ql/parse/GenTezWork.java | 40 +-
      .../hadoop/hive/ql/parse/GenTezWorkWalker.java | 6 +
      .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 2 +
      .../apache/hadoop/hive/ql/parse/HiveParser.g | 51 +-
      .../hadoop/hive/ql/parse/IdentifiersParser.g | 8 +-
      .../hive/ql/parse/ImportSemanticAnalyzer.java | 768 ++-
      .../hive/ql/parse/MetaDataExportListener.java | 2 +-
      .../org/apache/hadoop/hive/ql/parse/QB.java | 10 +-
      .../hadoop/hive/ql/parse/ReplicationSpec.java | 314 +
      .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 151 +-
      .../hive/ql/parse/TypeCheckProcFactory.java | 2 +-
      .../hadoop/hive/ql/parse/UnparseTranslator.java | 2 +-
      .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 13 +-
      .../hadoop/hive/ql/parse/WindowingSpec.java | 7 +-
      .../hive/ql/parse/spark/GenSparkUtils.java | 33 +-
      .../hadoop/hive/ql/plan/AddPartitionDesc.java | 15 +
      .../apache/hadoop/hive/ql/plan/BaseWork.java | 1 +
      .../hadoop/hive/ql/plan/CreateTableDesc.java | 14 +
      .../hadoop/hive/ql/plan/DropTableDesc.java | 23 +-
      .../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 39 +
      .../apache/hadoop/hive/ql/plan/JoinDesc.java | 19 +
      .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 29 +-
      .../apache/hadoop/hive/ql/plan/PlanUtils.java | 46 +-
      .../hadoop/hive/ql/plan/ReduceSinkDesc.java | 12 +
      .../apache/hadoop/hive/ql/plan/SparkWork.java | 11 +-
      .../hadoop/hive/ql/plan/VectorMapJoinDesc.java | 107 +
      .../AuthorizationPreEventListener.java | 68 +-
      ...MultiPartitionAuthorizationProviderBase.java | 43 +
      .../MetaStoreAuthzAPIAuthorizerEmbedOnly.java | 3 +-
      .../StorageBasedAuthorizationProvider.java | 125 +-
      .../hadoop/hive/ql/session/SessionState.java | 29 +-
      .../hadoop/hive/ql/txn/compactor/Cleaner.java | 18 +-
      .../hadoop/hive/ql/txn/compactor/Worker.java | 30 +-
      .../org/apache/hadoop/hive/ql/udf/UDFMd5.java | 79 +
      .../hadoop/hive/ql/udf/generic/GenericUDF.java | 15 +-
      .../ql/udf/generic/GenericUDFAddMonths.java | 5 +-
      .../hive/ql/udf/generic/GenericUDFArray.java | 5 +-
      .../hive/ql/udf/generic/GenericUDFCbrt.java | 3 +-
      .../ql/udf/generic/GenericUDFDateFormat.java | 125 +
      .../hive/ql/udf/generic/GenericUDFDecode.java | 24 +-
      .../ql/udf/generic/GenericUDFFactorial.java | 3 +-
      .../hive/ql/udf/generic/GenericUDFLastDay.java | 4 +-
      .../ql/udf/generic/GenericUDFLevenshtein.java | 5 +-
      .../hive/ql/udf/generic/GenericUDFMap.java | 20 +-
      .../hive/ql/udf/generic/GenericUDFNextDay.java | 11 +-
      .../hive/ql/udf/generic/GenericUDFQuarter.java | 85 +
      .../hive/ql/udf/generic/GenericUDFSoundex.java | 6 +-
      .../ql/udf/generic/GenericUDFTimestamp.java | 24 +
      .../hive/ql/udf/generic/GenericUDFTrunc.java | 11 +-
      .../apache/hadoop/hive/ql/TestTxnCommands2.java | 59 +-
      .../session/TestSparkSessionManagerImpl.java | 5 +
      .../exec/vector/mapjoin/TestDebugDisplay.java | 63 +
      .../mapjoin/fast/CommonFastHashTable.java | 128 +
      .../mapjoin/fast/RandomByteArrayStream.java | 92 +
      .../vector/mapjoin/fast/RandomLongStream.java | 49 +
      .../fast/TestVectorMapJoinFastLongHashMap.java | 219 +
      .../TestVectorMapJoinFastMultiKeyHashMap.java | 231 +
      .../TestVectorMapJoinRowBytesContainer.java | 74 +
      .../hive/ql/io/filters/TestBloomFilter.java | 458 --
      .../hadoop/hive/ql/io/filters/TestMurmur3.java | 189 -
      .../hive/ql/io/orc/TestColumnStatistics.java | 20 +-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 105 +-
      .../hive/ql/io/orc/TestOrcSerDeStats.java | 12 +-
      .../hadoop/hive/ql/io/orc/TestOrcTimezone1.java | 4 +-
      .../hadoop/hive/ql/io/orc/TestOrcTimezone2.java | 2 +-
      .../hive/ql/io/orc/TestRecordReaderImpl.java | 328 +-
      .../io/parquet/AbstractTestParquetDirect.java | 5 +-
      .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 104 +-
      .../hive/ql/lockmgr/TestDbTxnManager.java | 2 +
      .../hive/ql/lockmgr/TestDbTxnManager2.java | 233 +
      .../ql/optimizer/physical/TestVectorizer.java | 2 +-
      ...tedCharsInColumnNameCreateTableNegative.java | 87 +
      .../hadoop/hive/ql/udf/TestGenericUDFDate.java | 92 -
      .../hive/ql/udf/TestGenericUDFDateAdd.java | 145 -
      .../hive/ql/udf/TestGenericUDFDateDiff.java | 116 -
      .../hive/ql/udf/TestGenericUDFDateSub.java | 143 -
      .../hadoop/hive/ql/udf/TestGenericUDFUtils.java | 58 -
      .../apache/hadoop/hive/ql/udf/TestUDFMd5.java | 57 +
      .../hive/ql/udf/generic/TestGenericUDFDate.java | 92 +
      .../ql/udf/generic/TestGenericUDFDateAdd.java | 143 +
      .../ql/udf/generic/TestGenericUDFDateDiff.java | 116 +
      .../udf/generic/TestGenericUDFDateFormat.java | 173 +
      .../ql/udf/generic/TestGenericUDFDateSub.java | 143 +
      .../udf/generic/TestGenericUDFLevenshtein.java | 4 +-
      .../ql/udf/generic/TestGenericUDFNextDay.java | 4 +-
      .../ql/udf/generic/TestGenericUDFQuarter.java | 182 +
      .../ql/udf/generic/TestGenericUDFUtils.java | 57 +
      .../authorization_set_nonexistent_conf.q | 7 +
      .../queries/clientnegative/insertsel_fail.q | 1 +
      .../clientnegative/protectmode_part_no_drop2.q | 11 +
      .../clientpositive/alter_partition_coltype.q | 1 +
      .../clientpositive/annotate_stats_part.q | 1 +
      .../clientpositive/auto_sortmerge_join_13.q | 2 +
      ql/src/test/queries/clientpositive/cbo_join.q | 1 +
      ql/src/test/queries/clientpositive/cbo_limit.q | 2 +-
      .../queries/clientpositive/cbo_rp_auto_join0.q | 24 +
      .../queries/clientpositive/cbo_rp_auto_join1.q | 274 +
      .../queries/clientpositive/cbo_simple_select.q | 3 +-
      .../test/queries/clientpositive/cbo_subq_in.q | 8 +-
      .../queries/clientpositive/cbo_subq_not_in.q | 4 +-
      .../test/queries/clientpositive/cbo_udf_udaf.q | 12 +-
      .../queries/clientpositive/decimal_precision2.q | 15 +
      .../test/queries/clientpositive/explainuser_2.q | 22 +
      .../clientpositive/hybridgrace_hashjoin_1.q | 258 +
      .../clientpositive/hybridgrace_hashjoin_2.q | 152 +
      .../queries/clientpositive/hybridhashjoin.q | 250 -
      .../clientpositive/insert_overwrite_directory.q | 141 +
      .../queries/clientpositive/join_on_varchar.q | 12 +
      .../queries/clientpositive/limit_pushdown.q | 4 +
      ql/src/test/queries/clientpositive/mergejoin.q | 107 +
      .../queries/clientpositive/optimize_nullscan.q | 4 +
      .../clientpositive/partition_coltype_literals.q | 75 +
      .../test/queries/clientpositive/repl_1_drop.q | 84 +
      .../queries/clientpositive/repl_2_exim_basic.q | 79 +
      .../clientpositive/repl_3_exim_metadata.q | 40 +
      .../runtime_skewjoin_mapjoin_spark.q | 24 +
      .../test/queries/clientpositive/tez_join_hash.q | 2 +
      ql/src/test/queries/clientpositive/tez_smb_1.q | 1 +
      .../test/queries/clientpositive/tez_smb_main.q | 7 +
      .../clientpositive/tez_union_multiinsert.q | 120 +
      .../clientpositive/timestamp_ints_casts.q | 74 +
      .../queries/clientpositive/udf_date_format.q | 60 +
      ql/src/test/queries/clientpositive/udf_md5.q | 13 +
      .../test/queries/clientpositive/udf_quarter.q | 100 +
      ql/src/test/queries/clientpositive/union12.q | 2 +
      ql/src/test/queries/clientpositive/union17.q | 2 +-
      ql/src/test/queries/clientpositive/union20.q | 2 +-
      ql/src/test/queries/clientpositive/union21.q | 2 +-
      ql/src/test/queries/clientpositive/union27.q | 2 +-
      .../queries/clientpositive/union_remove_22.q | 2 +
      .../clientpositive/union_remove_6_subq.q | 2 +
      .../queries/clientpositive/vector_aggregate_9.q | 2 +
      .../clientpositive/vector_binary_join_groupby.q | 55 +
      .../clientpositive/vector_char_mapjoin1.q | 9 +
      .../clientpositive/vector_decimal_mapjoin.q | 43 +-
      .../queries/clientpositive/vector_inner_join.q | 61 +
      .../clientpositive/vector_left_outer_join.q | 1 +
      .../vector_mr_diff_schema_alias.q | 115 +
      .../queries/clientpositive/vector_outer_join0.q | 25 +
      .../queries/clientpositive/vector_outer_join1.q | 64 +
      .../queries/clientpositive/vector_outer_join2.q | 39 +
      .../queries/clientpositive/vector_outer_join3.q | 80 +
      .../queries/clientpositive/vector_outer_join4.q | 66 +
      .../queries/clientpositive/vectorized_casts.q | 2 +-
      .../queries/clientpositive/vectorized_context.q | 1 +
      .../vectorized_timestamp_ints_casts.q | 76 +
      .../queries/clientpositive/windowing_navfn.q | 4 +-
      .../clientpositive/windowing_windowspec.q | 2 +
      ql/src/test/resources/orc-file-has-null.out | 42 +-
      .../alter_rename_partition_failure3.q.out | 2 +-
      .../alter_table_add_partition.q.out | 2 +-
      .../clientnegative/alter_view_failure5.q.out | 2 +-
      .../clientnegative/alter_view_failure7.q.out | 2 +-
      .../clientnegative/archive_partspec1.q.out | 2 +-
      .../clientnegative/archive_partspec5.q.out | 8 +-
      .../authorization_set_nonexistent_conf.q.out | 9 +
      .../results/clientnegative/insertsel_fail.q.out | 1 +
      .../protectmode_part_no_drop2.q.out | 51 +
      ql/src/test/results/clientnegative/touch2.q.out | 2 +-
      .../truncate_partition_column.q.out | 2 +-
      .../clientnegative/udf_add_months_error_1.q.out | 2 +-
      .../clientnegative/udf_last_day_error_1.q.out | 2 +-
      .../clientnegative/udf_next_day_error_1.q.out | 2 +-
      .../clientnegative/udf_next_day_error_2.q.out | 2 +-
      .../annotate_stats_join_pkfk.q.out | 20 +-
      .../clientpositive/annotate_stats_select.q.out | 2 +-
      .../clientpositive/auto_sortmerge_join_1.q.out | 3 +
      .../clientpositive/auto_sortmerge_join_2.q.out | 2 +
      .../clientpositive/auto_sortmerge_join_3.q.out | 3 +
      .../clientpositive/auto_sortmerge_join_4.q.out | 3 +
      .../clientpositive/auto_sortmerge_join_5.q.out | 3 +
      .../clientpositive/auto_sortmerge_join_7.q.out | 3 +
      .../clientpositive/auto_sortmerge_join_8.q.out | 3 +
      .../clientpositive/bucketcontext_1.q.out | 1 +
      .../clientpositive/bucketcontext_2.q.out | 1 +
      .../clientpositive/bucketcontext_3.q.out | 1 +
      .../clientpositive/bucketcontext_4.q.out | 1 +
      .../clientpositive/bucketcontext_5.q.out | 1 +
      .../clientpositive/bucketcontext_6.q.out | 1 +
      .../clientpositive/bucketcontext_7.q.out | 1 +
      .../clientpositive/bucketcontext_8.q.out | 1 +
      .../test/results/clientpositive/cbo_join.q.out | 350 +-
      .../test/results/clientpositive/cbo_limit.q.out | 4 +-
      .../clientpositive/cbo_rp_auto_join0.q.out | 258 +
      .../clientpositive/cbo_rp_auto_join1.q.out | 1512 +++++
      .../clientpositive/cbo_simple_select.q.out | 10 +
      .../results/clientpositive/cbo_subq_in.q.out | 38 +-
      .../clientpositive/cbo_subq_not_in.q.out | 32 +-
      .../results/clientpositive/cbo_udf_udaf.q.out | 34 +-
      .../clientpositive/correlationoptimizer12.q.out | 8 +-
      .../results/clientpositive/ctas_colname.q.out | 8 +-
      .../clientpositive/decimal_precision2.q.out | 163 +
      .../results/clientpositive/decimal_udf.q.out | 2 +-
      .../groupby_grouping_window.q.out | 4 +-
      .../clientpositive/groupby_resolution.q.out | 4 +-
      ql/src/test/results/clientpositive/input8.q.out | 2 +-
      ql/src/test/results/clientpositive/input9.q.out | 8 +-
      .../insert_overwrite_directory.q.out | 1813 ++++++
      ql/src/test/results/clientpositive/join32.q.out | 84 +-
      .../clientpositive/join32_lessSize.q.out | 423 +-
      ql/src/test/results/clientpositive/join33.q.out | 84 +-
      .../clientpositive/join_alt_syntax.q.out | 306 +-
      .../clientpositive/join_cond_pushdown_2.q.out | 150 +-
      .../clientpositive/join_cond_pushdown_4.q.out | 150 +-
      .../clientpositive/join_on_varchar.q.out | 146 +
      .../results/clientpositive/limit_pushdown.q.out | 88 +
      .../clientpositive/literal_decimal.q.out | 6 +-
      .../clientpositive/load_dyn_part14.q.out | 24 +-
      .../test/results/clientpositive/mergejoin.q.out | 2567 ++++++++
      .../clientpositive/num_op_type_conv.q.out | 6 +-
      .../clientpositive/optimize_nullscan.q.out | 147 +
      .../partition_coltype_literals.q.out | 647 ++
      .../clientpositive/partition_timestamp.q.out | 116 +-
      .../clientpositive/partition_timestamp2.q.out | 250 +-
      .../clientpositive/ppd_constant_expr.q.out | 4 +-
      ql/src/test/results/clientpositive/ptf.q.out | 160 +-
      .../results/clientpositive/ptf_streaming.q.out | 84 +-
      .../results/clientpositive/quotedid_basic.q.out | 8 +-
      .../results/clientpositive/repl_1_drop.q.out | 345 +
      .../clientpositive/repl_2_exim_basic.q.out | 494 ++
      .../clientpositive/repl_3_exim_metadata.q.out | 222 +
      .../runtime_skewjoin_mapjoin_spark.q.out | 669 ++
      .../results/clientpositive/show_functions.q.out | 5 +
      .../results/clientpositive/smb_mapjoin9.q.out | 2 +
      .../results/clientpositive/smb_mapjoin_11.q.out | 1 +
      .../results/clientpositive/smb_mapjoin_12.q.out | 2 +
      .../results/clientpositive/smb_mapjoin_13.q.out | 1 +
      .../results/clientpositive/smb_mapjoin_15.q.out | 3 +
      .../clientpositive/sort_merge_join_desc_5.q.out | 1 +
      .../clientpositive/spark/auto_join0.q.out | 2 +-
      .../clientpositive/spark/auto_join15.q.out | 2 +-
      .../clientpositive/spark/auto_join20.q.out | 4 +-
      .../clientpositive/spark/auto_join21.q.out | 2 +-
      .../clientpositive/spark/auto_join23.q.out | 2 +-
      .../clientpositive/spark/auto_join28.q.out | 8 +-
      .../clientpositive/spark/auto_join29.q.out | 18 +-
      .../clientpositive/spark/auto_join30.q.out | 42 +-
      .../clientpositive/spark/auto_join31.q.out | 6 +-
      .../clientpositive/spark/auto_join8.q.out | 2 +-
      .../spark/auto_sortmerge_join_1.q.out | 2 +
      .../spark/auto_sortmerge_join_2.q.out | 1 +
      .../spark/auto_sortmerge_join_3.q.out | 2 +
      .../spark/auto_sortmerge_join_4.q.out | 2 +
      .../spark/auto_sortmerge_join_5.q.out | 2 +
      .../spark/auto_sortmerge_join_7.q.out | 2 +
      .../spark/auto_sortmerge_join_8.q.out | 2 +
      .../results/clientpositive/spark/bucket5.q.out | 2 +-
      .../results/clientpositive/spark/cbo_gby.q.out | 120 +
      .../clientpositive/spark/cbo_limit.q.out | 90 +
      .../clientpositive/spark/cbo_semijoin.q.out | 440 ++
      .../spark/cbo_simple_select.q.out | 755 +++
      .../clientpositive/spark/cbo_stats.q.out | 14 +
      .../clientpositive/spark/cbo_subq_in.q.out | 149 +
      .../clientpositive/spark/cbo_subq_not_in.q.out | 365 ++
      .../clientpositive/spark/cbo_udf_udaf.q.out | 121 +
      .../clientpositive/spark/cbo_union.q.out | 920 +++
      .../results/clientpositive/spark/ctas.q.out | 20 +-
      .../spark/escape_clusterby1.q.out | 4 +-
      .../clientpositive/spark/escape_sortby1.q.out | 4 +-
      .../clientpositive/spark/groupby10.q.out | 2 +-
      .../groupby7_map_multi_single_reducer.q.out | 2 +-
      .../groupby7_noskew_multi_single_reducer.q.out | 4 +-
      .../clientpositive/spark/groupby8_map.q.out | 2 +-
      .../clientpositive/spark/groupby8_noskew.q.out | 2 +-
      .../spark/groupby_multi_single_reducer3.q.out | 8 +-
      .../spark/identity_project_remove_skip.q.out | 4 +-
      .../results/clientpositive/spark/input14.q.out | 2 +-
      .../results/clientpositive/spark/input17.q.out | 2 +-
      .../results/clientpositive/spark/input18.q.out | 2 +-
      .../clientpositive/spark/join0.q.java1.7.out | 2 +-
      .../clientpositive/spark/join0.q.java1.8.out | 238 +
      .../results/clientpositive/spark/join15.q.out | 2 +-
      .../results/clientpositive/spark/join20.q.out | 4 +-
      .../results/clientpositive/spark/join21.q.out | 2 +-
      .../results/clientpositive/spark/join23.q.out | 2 +-
      .../results/clientpositive/spark/join32.q.out | 88 +-
      .../clientpositive/spark/join32_lessSize.q.out | 286 +-
      .../results/clientpositive/spark/join33.q.out | 88 +-
      .../results/clientpositive/spark/join40.q.out | 4 +-
      .../results/clientpositive/spark/join8.q.out | 2 +-
      .../clientpositive/spark/join_alt_syntax.q.out | 210 +-
      .../spark/join_cond_pushdown_2.q.out | 98 +-
      .../spark/join_cond_pushdown_4.q.out | 98 +-
      .../clientpositive/spark/limit_pushdown.q.out | 94 +
      .../spark/list_bucket_dml_2.q.java1.8.out | 665 ++
      .../clientpositive/spark/load_dyn_part14.q.out | 8 +-
      .../clientpositive/spark/mapjoin_decimal.q.out | 8 +-
      .../spark/mapjoin_filter_on_outerjoin.q.out | 4 +-
      .../spark/mapjoin_test_outer.q.out | 4 +-
      .../clientpositive/spark/multi_insert.q.out | 8 +-
      .../clientpositive/spark/multi_insert_gby.q.out | 4 +-
      .../spark/multi_insert_gby3.q.out | 6 +-
      .../spark/multi_insert_lateral_view.q.out | 2 +-
      ...i_insert_move_tasks_share_dependencies.q.out | 72 +-
      .../spark/multigroupby_singlemr.q.out | 2 +-
      .../spark/optimize_nullscan.q.out | 153 +
      .../results/clientpositive/spark/parallel.q.out | 2 +-
      .../clientpositive/spark/parallel_join0.q.out | 2 +-
      .../clientpositive/spark/ppd_join4.q.out | 2 +-
      .../clientpositive/spark/ppd_transform.q.out | 4 +-
      .../test/results/clientpositive/spark/ptf.q.out | 194 +-
      .../clientpositive/spark/ptf_streaming.q.out | 100 +-
      .../spark/reduce_deduplicate.q.out | 2 +-
      .../spark/reduce_deduplicate_exclude_join.q.out | 2 +-
      .../spark/runtime_skewjoin_mapjoin_spark.q.out | 314 +
      .../results/clientpositive/spark/semijoin.q.out | 40 +-
      .../clientpositive/spark/smb_mapjoin_11.q.out | 1 +
      .../clientpositive/spark/smb_mapjoin_12.q.out | 2 +
      .../clientpositive/spark/smb_mapjoin_13.q.out | 1 +
      .../clientpositive/spark/smb_mapjoin_15.q.out | 3 +
      .../results/clientpositive/spark/sort.q.out | 2 +-
      .../spark/sort_merge_join_desc_5.q.out | 43 +-
      .../clientpositive/spark/subquery_in.q.out | 8 +-
      .../spark/subquery_multiinsert.q.java1.8.out | 890 +++
      .../clientpositive/spark/transform_ppr1.q.out | 2 +-
      .../clientpositive/spark/transform_ppr2.q.out | 2 +-
      .../results/clientpositive/spark/union12.q.out | 200 +
      .../results/clientpositive/spark/union17.q.out | 855 +++
      .../results/clientpositive/spark/union20.q.out | 200 +
      .../results/clientpositive/spark/union21.q.out | 724 +++
      .../results/clientpositive/spark/union22.q.out | 1602 +++++
      .../results/clientpositive/spark/union24.q.out | 1649 +++++
      .../results/clientpositive/spark/union26.q.out | 1249 ++++
      .../results/clientpositive/spark/union27.q.out | 136 +
      .../results/clientpositive/spark/union3.q.out | 2 +-
      .../results/clientpositive/spark/union31.q.out | 815 +++
      .../results/clientpositive/spark/union32.q.out | 669 ++
      .../results/clientpositive/spark/union34.q.out | 406 ++
      .../clientpositive/spark/union_date.q.out | 142 +
      .../clientpositive/spark/union_date_trim.q.out | 54 +
      .../spark/union_lateralview.q.out | 251 +
      .../clientpositive/spark/union_ppr.q.out | 2 +-
      .../clientpositive/spark/union_remove_12.q.out | 281 +
      .../clientpositive/spark/union_remove_13.q.out | 306 +
      .../clientpositive/spark/union_remove_14.q.out | 283 +
      .../clientpositive/spark/union_remove_22.q.out | 401 ++
      .../clientpositive/spark/union_remove_23.q.out | 265 +
      .../spark/union_remove_6_subq.q.out | 1181 ++++
      .../clientpositive/spark/union_script.q.out | 1524 +++++
      .../clientpositive/spark/union_top_level.q.out | 1012 +++
      .../spark/vector_between_in.q.out | 6088 +-----------------
      .../spark/vector_decimal_mapjoin.q.out | 366 +-
      .../clientpositive/spark/vector_elt.q.out | 2 +-
      .../spark/vectorization_decimal_date.q.out | 20 +-
      .../clientpositive/spark/vectorized_ptf.q.out | 194 +-
      .../results/clientpositive/subquery_in.q.out | 8 +-
      .../subquery_in_explain_rewrite.q.out | 4 +-
      .../clientpositive/subquery_in_having.q.out | 6 +-
      .../results/clientpositive/subquery_notin.q.out | 24 +-
      .../subquery_unqualcolumnrefs.q.out | 16 +-
      .../results/clientpositive/tez/auto_join0.q.out | 1 +
      .../results/clientpositive/tez/auto_join1.q.out | 1 +
      .../clientpositive/tez/auto_join29.q.out | 3 +
      .../clientpositive/tez/auto_join30.q.out | 4 +
      .../tez/auto_sortmerge_join_1.q.out | 3 +
      .../tez/auto_sortmerge_join_10.q.out | 3 +
      .../tez/auto_sortmerge_join_11.q.out | 4 +
      .../tez/auto_sortmerge_join_12.q.out | 3 +
      .../tez/auto_sortmerge_join_13.q.out | 3 +
      .../tez/auto_sortmerge_join_14.q.out | 2 +
      .../tez/auto_sortmerge_join_15.q.out | 2 +
      .../tez/auto_sortmerge_join_2.q.out | 2 +
      .../tez/auto_sortmerge_join_3.q.out | 3 +
      .../tez/auto_sortmerge_join_4.q.out | 3 +
      .../tez/auto_sortmerge_join_5.q.out | 1 +
      .../tez/auto_sortmerge_join_7.q.out | 3 +
      .../tez/auto_sortmerge_join_8.q.out | 3 +
      .../tez/auto_sortmerge_join_9.q.out | 29 +
      .../tez/bucket_map_join_tez1.q.out | 16 +
      .../tez/bucket_map_join_tez2.q.out | 8 +
      .../results/clientpositive/tez/cbo_join.q.out | 350 +-
      .../results/clientpositive/tez/cbo_limit.q.out | 4 +-
      .../clientpositive/tez/cbo_simple_select.q.out | 10 +
      .../clientpositive/tez/cbo_subq_in.q.out | 38 +-
      .../clientpositive/tez/cbo_subq_not_in.q.out | 32 +-
      .../clientpositive/tez/cbo_udf_udaf.q.out | 34 +-
      .../tez/correlationoptimizer1.q.out | 1 +
      .../tez/cross_product_check_2.q.out | 9 +
      .../tez/dynamic_partition_pruning.q.out | 16 +
      .../tez/dynamic_partition_pruning_2.q.out | 7 +
      .../clientpositive/tez/explainuser_1.q.out | 6 +-
      .../clientpositive/tez/explainuser_2.q.out | 4337 +++++++------
      .../tez/hybridgrace_hashjoin_1.q.out | 1587 +++++
      .../tez/hybridgrace_hashjoin_2.q.out | 1417 ++++
      .../clientpositive/tez/hybridhashjoin.q.out | 1560 -----
      .../clientpositive/tez/limit_pushdown.q.out | 94 +
      .../clientpositive/tez/lvj_mapjoin.q.out | 2 +
      .../clientpositive/tez/mapjoin_decimal.q.out | 10 +-
      .../clientpositive/tez/mapjoin_mapjoin.q.out | 6 +
      .../results/clientpositive/tez/mergejoin.q.out | 2528 ++++++++
      .../test/results/clientpositive/tez/mrr.q.out | 2 +
      .../clientpositive/tez/optimize_nullscan.q.out | 153 +
      .../test/results/clientpositive/tez/ptf.q.out | 160 +-
      .../clientpositive/tez/ptf_streaming.q.out | 84 +-
      .../clientpositive/tez/subquery_in.q.out | 8 +-
      .../tez/tez_bmj_schema_evolution.q.out | 1 +
      .../clientpositive/tez/tez_join_hash.q.out | 2 +
      .../clientpositive/tez/tez_smb_main.q.out | 12 +
      .../results/clientpositive/tez/tez_union.q.out | 14 +
      .../tez/tez_union_multiinsert.q.out | 4293 ++++++++++++
      .../clientpositive/tez/unionDistinct_1.q.out | 10 +
      .../clientpositive/tez/vector_aggregate_9.q.out | 8 +-
      .../clientpositive/tez/vector_between_in.q.out | 6088 +-----------------
      .../tez/vector_binary_join_groupby.q.out | 303 +
      .../tez/vector_char_mapjoin1.q.out | 43 +-
      .../clientpositive/tez/vector_coalesce.q.out | 4 +-
      .../clientpositive/tez/vector_decimal_2.q.out | 4 +
      .../tez/vector_decimal_mapjoin.q.out | 371 +-
      .../clientpositive/tez/vector_decimal_udf.q.out | 3 +-
      .../results/clientpositive/tez/vector_elt.q.out | 2 +-
      .../clientpositive/tez/vector_inner_join.q.out | 806 +++
      .../tez/vector_mapjoin_reduce.q.out | 4 +
      .../tez/vector_mr_diff_schema_alias.q.out | 381 ++
      .../clientpositive/tez/vector_outer_join.q.out | 2204 +++++++
      .../clientpositive/tez/vector_outer_join0.q.out | 232 +
      .../clientpositive/tez/vector_outer_join1.q.out | 541 ++
      .../clientpositive/tez/vector_outer_join2.q.out | 238 +
      .../clientpositive/tez/vector_outer_join3.q.out | 527 ++
      .../clientpositive/tez/vector_outer_join4.q.out | 864 +++
      .../tez/vector_varchar_mapjoin1.q.out | 5 +-
      .../tez/vectorization_decimal_date.q.out | 20 +-
      .../clientpositive/tez/vectorized_casts.q.out | 52 +-
      .../vectorized_dynamic_partition_pruning.q.out | 16 +
      .../clientpositive/tez/vectorized_mapjoin.q.out | 1 +
      .../tez/vectorized_nested_mapjoin.q.out | 2 +
      .../tez/vectorized_parquet_types.q.out | 347 +
      .../clientpositive/tez/vectorized_ptf.q.out | 160 +-
      .../clientpositive/timestamp_ints_casts.q.out | 264 +
      ql/src/test/results/clientpositive/udf4.q.out | 2 +-
      ql/src/test/results/clientpositive/udf6.q.out | 30 +-
      ql/src/test/results/clientpositive/udf7.q.out | 2 +-
      .../test/results/clientpositive/udf_case.q.out | 2 +-
      .../results/clientpositive/udf_coalesce.q.out | 4 +-
      .../clientpositive/udf_date_format.q.out | 172 +
      .../test/results/clientpositive/udf_elt.q.out | 4 +-
      .../results/clientpositive/udf_greatest.q.out | 4 +-
      .../test/results/clientpositive/udf_hour.q.out | 2 +-
      ql/src/test/results/clientpositive/udf_if.q.out | 4 +-
      .../test/results/clientpositive/udf_instr.q.out | 4 +-
      .../clientpositive/udf_isnull_isnotnull.q.out | 2 +-
      .../test/results/clientpositive/udf_least.q.out | 4 +-
      .../results/clientpositive/udf_locate.q.out | 4 +-
      .../test/results/clientpositive/udf_md5.q.out | 61 +
      .../results/clientpositive/udf_minute.q.out | 2 +-
      .../test/results/clientpositive/udf_nvl.q.out | 2 +-
      .../results/clientpositive/udf_parse_url.q.out | 2 +-
      .../results/clientpositive/udf_quarter.q.out | 246 +
      .../results/clientpositive/udf_second.q.out | 2 +-
      .../test/results/clientpositive/udf_size.q.out | 2 +-
      .../test/results/clientpositive/udf_trunc.q.out | 64 +-
      .../test/results/clientpositive/udf_when.q.out | 4 +-
      .../test/results/clientpositive/union12.q.out | 8 +-
      .../test/results/clientpositive/union17.q.out | 4 +-
      .../test/results/clientpositive/union20.q.out | 22 +-
      .../test/results/clientpositive/union21.q.out | 6 +-
      .../test/results/clientpositive/union27.q.out | 4 +-
      .../clientpositive/union_remove_22.q.out | 16 +-
      .../clientpositive/union_remove_6_subq.q.out | 12 +-
      .../results/clientpositive/union_view.q.out | 450 +-
      .../clientpositive/vector_aggregate_9.q.out | 8 +-
      .../clientpositive/vector_between_in.q.out | 6088 +-----------------
      .../vector_binary_join_groupby.q.out | 293 +
      .../clientpositive/vector_char_mapjoin1.q.out | 36 +-
      .../clientpositive/vector_coalesce.q.out | 4 +-
      .../clientpositive/vector_decimal_2.q.out | 4 +
      .../clientpositive/vector_decimal_mapjoin.q.out | 370 +-
      .../clientpositive/vector_decimal_udf.q.out | 3 +-
      .../results/clientpositive/vector_elt.q.out | 2 +-
      .../clientpositive/vector_inner_join.q.out | 799 +++
      .../vector_mr_diff_schema_alias.q.out | 396 ++
      .../clientpositive/vector_outer_join0.q.out | 230 +
      .../clientpositive/vector_outer_join1.q.out | 534 ++
      .../clientpositive/vector_outer_join2.q.out | 232 +
      .../clientpositive/vector_outer_join3.q.out | 509 ++
      .../clientpositive/vector_outer_join4.q.out | 857 +++
      .../vector_varchar_mapjoin1.q.out | 4 +-
      .../vectorization_decimal_date.q.out | 20 +-
      .../clientpositive/vectorized_casts.q.out | 52 +-
      .../results/clientpositive/vectorized_ptf.q.out | 167 +-
      .../clientpositive/vectorized_shufflejoin.q.out | 1 -
      .../vectorized_timestamp_ints_casts.q.out | 266 +
      .../clientpositive/windowing_navfn.q.out | 10 +
      .../clientpositive/windowing_streaming.q.out | 16 +-
      .../clientpositive/windowing_windowspec.q.out | 108 +
      ql/src/test/templates/TestCliDriver.vm | 18 +-
      ql/src/test/templates/TestCompareCliDriver.vm | 19 +-
      ql/src/test/templates/TestNegativeCliDriver.vm | 18 +-
      ql/src/test/templates/TestParseNegative.vm | 17 +-
      serde/pom.xml | 2 +-
      .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 19 +-
      .../apache/hadoop/hive/serde2/SerDeUtils.java | 1 +
      .../apache/hadoop/hive/serde2/WriteBuffers.java | 25 +-
      .../binarysortable/BinarySortableSerDe.java | 157 +-
      .../fast/BinarySortableSerializeWrite.java | 102 +-
      .../hadoop/hive/serde2/fast/SerializeWrite.java | 12 +-
      .../hive/serde2/io/TimestampWritable.java | 11 +-
      .../lazy/fast/LazySimpleSerializeWrite.java | 13 +-
      .../LazyObjectInspectorFactory.java | 24 +-
      .../LazyPrimitiveObjectInspectorFactory.java | 41 +-
      .../hive/serde2/lazybinary/LazyBinaryUtils.java | 19 +-
      .../fast/LazyBinarySerializeWrite.java | 15 +-
      .../LazyBinaryObjectInspectorFactory.java | 24 +-
      .../MetadataListStructObjectInspector.java | 16 +-
      .../PrimitiveObjectInspectorConverter.java | 7 +-
      .../PrimitiveObjectInspectorFactory.java | 22 +-
      .../PrimitiveObjectInspectorUtils.java | 42 +-
      .../hive/serde2/typeinfo/TypeInfoFactory.java | 31 +-
      .../hive/serde2/typeinfo/TypeInfoUtils.java | 27 +-
      .../hive/serde2/binarysortable/MyTestClass.java | 105 +
      .../binarysortable/MyTestPrimitiveClass.java | 20 +
      .../binarysortable/TestBinarySortableFast.java | 13 +-
      .../binarysortable/TestBinarySortableSerDe.java | 10 +-
      .../hive/serde2/lazy/TestLazySimpleFast.java | 22 +-
      .../TestPrimitiveObjectInspectorUtils.java | 95 +-
      service/pom.xml | 7 +-
      .../org/apache/hive/service/CookieSigner.java | 90 +
      .../apache/hive/service/auth/HttpAuthUtils.java | 72 +
      .../auth/LdapAuthenticationProviderImpl.java | 2 +-
      .../service/cli/session/HiveSessionImpl.java | 14 +-
      .../service/cli/session/SessionManager.java | 49 +-
      .../thrift/EmbeddedThriftBinaryCLIService.java | 5 +
      .../cli/thrift/ThriftBinaryCLIService.java | 1 +
      .../service/cli/thrift/ThriftCLIService.java | 79 +-
      .../service/cli/thrift/ThriftHttpServlet.java | 210 +-
      .../apache/hive/service/server/HiveServer2.java | 138 +-
      .../apache/hive/service/TestCookieSigner.java | 59 +
      .../service/cli/session/TestSessionHooks.java | 3 +-
      .../thrift/ThriftCliServiceTestWithCookie.java | 221 +
      shims/0.20S/pom.xml | 2 +-
      .../hadoop/hive/shims/Hadoop20SShims.java | 30 +
      shims/0.23/pom.xml | 2 +-
      .../apache/hadoop/hive/shims/Hadoop23Shims.java | 66 +-
      shims/aggregator/pom.xml | 2 +-
      shims/common/pom.xml | 2 +-
      .../org/apache/hadoop/fs/DefaultFileAccess.java | 65 +-
      .../apache/hadoop/hive/shims/HadoopShims.java | 44 +-
      .../hadoop/hive/shims/HadoopShimsSecure.java | 12 +
      shims/pom.xml | 2 +-
      shims/scheduler/pom.xml | 2 +-
      spark-client/pom.xml | 4 +-
      .../apache/hive/spark/client/JobContext.java | 6 +
      .../hive/spark/client/JobContextImpl.java | 10 +-
      .../apache/hive/spark/client/RemoteDriver.java | 34 +-
      .../hive/spark/client/SparkClientImpl.java | 34 +-
      .../hive/spark/client/SparkClientUtilities.java | 23 +-
      .../apache/hive/spark/client/rpc/RpcServer.java | 11 +-
      .../apache/hive/spark/client/rpc/TestRpc.java | 32 +-
      testutils/metastore/execute-test-on-lxc.sh | 17 +-
      testutils/metastore/metastore-upgrade-test.sh | 5 +-
      testutils/pom.xml | 2 +-
      .../ptest2/src/main/resources/source-prep.vm | 11 +
      .../hive/ptest/execution/TestScripts.java | 23 +
      .../TestScripts.testPrepGit.approved.txt | 2 +-
      .../TestScripts.testPrepHadoop1.approved.txt | 111 +
      .../TestScripts.testPrepNone.approved.txt | 2 +-
      .../TestScripts.testPrepSvn.approved.txt | 2 +-
      1007 files changed, 102334 insertions(+), 31470 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ant/src/org/apache/hadoop/hive/ant/QTestGenTask.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/cli/pom.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
    ----------------------------------------------------------------------
    diff --cc itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
    index 32b971c,adb8a71..c7bbead
    --- a/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
    +++ b/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
    @@@ -166,11 -171,24 +171,25 @@@ public class MiniHS2 extends AbstractHi
           baseDir = Files.createTempDir();
           localFS = FileSystem.getLocal(hiveConf);
           FileSystem fs;
    - if (useMiniMR) {
    +
    + if (miniClusterType != MiniClusterType.DFS_ONLY) {
    + // Initialize dfs
             dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, 4, true, null);
             fs = dfs.getFileSystem();
    - mr = ShimLoader.getHadoopShims().getMiniMrCluster(hiveConf, 4,
    - fs.getUri().toString(), 1);
    + String uriString = WindowsPathUtil.getHdfsUriString(fs.getUri().toString());
    +
    + // Initialize the execution engine based on cluster type
    + switch (miniClusterType) {
    + case TEZ:
      - mr = ShimLoader.getHadoopShims().getMiniTezCluster(hiveConf, 4, uriString, 1);
    ++ mr = ShimLoader.getHadoopShims().getMiniTezCluster(hiveConf, 4, uriString, 1, false,
    ++ baseDir.toString() + "/staging");
    + break;
    + case MR:
    + mr = ShimLoader.getHadoopShims().getMiniMrCluster(hiveConf, 4, uriString, 1);
    + break;
    + default:
    + throw new IllegalArgumentException("Unsupported cluster type " + mr);
    + }
             // store the config in system properties
             mr.setupConfiguration(getHiveConf());
             baseDfsDir = new Path(new Path(fs.getUri()), "/base");

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/itests/qtest/pom.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --cc itests/src/test/resources/testconfiguration.properties
    index e7d5160,8e9984a..4ddaf51
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@@ -299,10 -302,10 +302,11 @@@ minitez.query.files=bucket_map_join_tez
         dynamic_partition_pruning_2.q,\
         explainuser_1.q,\
         explainuser_2.q,\
    - hybridhashjoin.q,\
    + hybridgrace_hashjoin_1.q,\
    + hybridgrace_hashjoin_2.q,\
         mapjoin_decimal.q,\
      - lvj_mapjoin.q, \
      + lvj_mapjoin.q,\
      + llapdecider.q,\
         mrr.q,\
         tez_bmj_schema_evolution.q,\
         tez_dml.q,\

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/packaging/pom.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/packaging/src/main/assembly/bin.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/pom.xml
    ----------------------------------------------------------------------
    diff --cc pom.xml
    index 8509413,1921b06..d4eb8e5
    --- a/pom.xml
    +++ b/pom.xml
    @@@ -156,9 -154,9 +156,9 @@@
           <stax.version>1.0.1</stax.version>
           <slf4j.version>1.7.5</slf4j.version>
           <ST4.version>4.0.4</ST4.version>
      - <tez.version>0.5.2</tez.version>
      + <tez.version>0.7.0-TEZ-2003-SNAPSHOT</tez.version>
           <super-csv.version>2.2.0</super-csv.version>
    - <spark.version>1.2.0</spark.version>
    + <spark.version>1.3.0</spark.version>
           <scala.binary.version>2.10</scala.binary.version>
           <scala.version>2.10.4</scala.version>
           <tempus-fugit.version>1.1</tempus-fugit.version>

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/pom.xml
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    index 1a836bd,b1352f3..47d3c6b
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
    @@@ -454,12 -509,31 +520,32 @@@ public class MapJoinOperator extends Ab
                 }
               }
             }
    +
    + if (isLogInfoEnabled) {
    + LOG.info("spilled: " + spilled + " abort: " + abort + ". Clearing spilled partitions.");
    + }
    +
    + // spilled tables are loaded always (no sharing), so clear it
    + clearAllTableContainers();
    + cache.remove(cacheKey);
           }
    +
    + // in mapreduce case, we need to always clear up as mapreduce doesn't have object registry.
           if ((this.getExecContext() != null) && (this.getExecContext().getLocalWork() != null)
    - && (this.getExecContext().getLocalWork().getInputFileChangeSensitive())
    - && mapJoinTables != null) {
    + && (this.getExecContext().getLocalWork().getInputFileChangeSensitive())) {
    + if (isLogInfoEnabled) {
    + LOG.info("MR: Clearing all map join table containers.");
    + }
    + clearAllTableContainers();
    + }
    +
    ++ mapJoinTables = null;
    + this.loader = null;
    + super.closeOp(abort);
    + }
    +
    + private void clearAllTableContainers() {
    + if (mapJoinTables != null) {
             for (MapJoinTableContainer tableContainer : mapJoinTables) {
               if (tableContainer != null) {
                 tableContainer.clear();

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    index e349fac,ad5c8f8..fe1c536
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    @@@ -3824,37 -3848,27 +3849,62 @@@ public final class Utilities
         }

         /**
    + * Checks if the current HiveServer2 logging operation level is >= PERFORMANCE.
    + * @param conf Hive configuration.
    + * @return true if current HiveServer2 logging operation level is >= PERFORMANCE.
    + * Else, false.
    + */
    + public static boolean isPerfOrAboveLogging(HiveConf conf) {
    + String loggingLevel = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL);
    + return conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED) &&
    + (loggingLevel.equalsIgnoreCase("PERFORMANCE") || loggingLevel.equalsIgnoreCase("VERBOSE"));
    + }
    +
    + /**
    + * Strips Hive password details from configuration
    + */
    + public static void stripHivePasswordDetails(Configuration conf) {
    + // Strip out all Hive related password information from the JobConf
    + if (HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD) != null) {
    + HiveConf.setVar(conf, HiveConf.ConfVars.METASTOREPWD, "");
    + }
    + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD) != null) {
    + HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD, "");
    + }
    + }
    ++
    ++ /**
      + * Returns the full path to the Jar containing the class. It always return a JAR.
      + *
      + * @param klass
      + * class.
      + *
      + * @return path to the Jar containing the class.
      + */
      + @SuppressWarnings("rawtypes")
      + public static String jarFinderGetJar(Class klass) {
      + Preconditions.checkNotNull(klass, "klass");
      + ClassLoader loader = klass.getClassLoader();
      + if (loader != null) {
      + String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
      + try {
      + for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) {
      + URL url = (URL) itr.nextElement();
      + String path = url.getPath();
      + if (path.startsWith("file:")) {
      + path = path.substring("file:".length());
      + }
      + path = URLDecoder.decode(path, "UTF-8");
      + if ("jar".equals(url.getProtocol())) {
      + path = URLDecoder.decode(path, "UTF-8");
      + return path.replaceAll("!.*$", "");
      + }
      + }
      + } catch (IOException e) {
      + throw new RuntimeException(e);
      + }
      + }
      + return null;
      + }
      +
       }

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
    index 716a6b6,3f240f5..78ba442
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
    @@@ -21,19 -21,20 +21,12 @@@ package org.apache.hadoop.hive.ql.exec.
       import java.util.HashMap;
       import java.util.List;
       import java.util.Map;
    -
    - import com.google.common.base.Preconditions;
    + import java.util.Set;

      -import com.google.common.base.Preconditions;
      -
       import org.apache.commons.logging.Log;
       import org.apache.commons.logging.LogFactory;
       import org.apache.hadoop.fs.Path;
       import org.apache.hadoop.hive.common.JavaUtils;
    --import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
    --import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
    --import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
    --import org.apache.hadoop.hive.ql.log.PerfLogger;
    --import org.apache.hadoop.mapred.FileOutputFormat;
    --import org.apache.hadoop.mapred.Partitioner;
       import org.apache.hadoop.hive.conf.HiveConf;
       import org.apache.hadoop.hive.ql.Context;
       import org.apache.hadoop.hive.ql.ErrorMsg;
    @@@ -41,6 -44,6 +36,10 @@@ import org.apache.hadoop.hive.ql.exec.U
       import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
       import org.apache.hadoop.hive.ql.exec.mr.ExecReducer;
       import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
    ++import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
    ++import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
    ++import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
    ++import org.apache.hadoop.hive.ql.log.PerfLogger;
       import org.apache.hadoop.hive.ql.metadata.HiveException;
       import org.apache.hadoop.hive.ql.plan.BaseWork;
       import org.apache.hadoop.hive.ql.plan.MapWork;
    @@@ -51,10 -54,10 +50,14 @@@ import org.apache.hadoop.hive.ql.stats.
       import org.apache.hadoop.hive.ql.stats.StatsPublisher;
       import org.apache.hadoop.io.Writable;
       import org.apache.hadoop.io.WritableComparable;
    ++import org.apache.hadoop.mapred.FileOutputFormat;
       import org.apache.hadoop.mapred.JobConf;
    ++import org.apache.hadoop.mapred.Partitioner;
       import org.apache.spark.api.java.JavaPairRDD;
       import org.apache.spark.api.java.JavaSparkContext;

    ++import com.google.common.base.Preconditions;
    ++
       @SuppressWarnings("rawtypes")
       public class SparkPlanGenerator {
         private static final String CLASS_NAME = SparkPlanGenerator.class.getName();
    @@@ -96,14 -99,20 +99,20 @@@
           workToTranMap.clear();
           workToParentWorkTranMap.clear();

    - for (BaseWork work : sparkWork.getAllWork()) {
    - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
    - SparkTran tran = generate(work);
    - SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
    - sparkPlan.addTran(tran);
    - sparkPlan.connect(parentTran, tran);
    - workToTranMap.put(work, tran);
    - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
    + try {
    + for (BaseWork work : sparkWork.getAllWork()) {
    + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
    + SparkTran tran = generate(work);
    + SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
    + sparkPlan.addTran(tran);
    + sparkPlan.connect(parentTran, tran);
    + workToTranMap.put(work, tran);
    + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
    + }
    + } finally {
    + // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
    + // as this may executed in a pool thread.
      - Utilities.clearWorkMap();
    ++ Utilities.clearWorkMap(jobConf);
           }

           perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
    index 7c939e7,0000000..b4a3236
    mode 100644,000000..100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/LlapObjectCache.java
    @@@ -1,139 -1,0 +1,145 @@@
      +/**
      + * Licensed to the Apache Software Foundation (ASF) under one
      + * or more contributor license agreements. See the NOTICE file
      + * distributed with this work for additional information
      + * regarding copyright ownership. The ASF licenses this file
      + * to you under the Apache License, Version 2.0 (the
      + * "License"); you may not use this file except in compliance
      + * with the License. You may obtain a copy of the License at
      + *
      + * http://www.apache.org/licenses/LICENSE-2.0
      + *
      + * Unless required by applicable law or agreed to in writing, software
      + * distributed under the License is distributed on an "AS IS" BASIS,
      + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      + * See the License for the specific language governing permissions and
      + * limitations under the License.
      + */
      +
      +package org.apache.hadoop.hive.ql.exec.tez;
      +
      +import java.util.HashMap;
      +import java.util.Map;
      +import java.util.concurrent.Callable;
      +import java.util.concurrent.ExecutorService;
      +import java.util.concurrent.Executors;
      +import java.util.concurrent.Future;
      +
      +import java.util.concurrent.locks.ReentrantLock;
      +
      +import org.apache.commons.logging.Log;
      +import org.apache.commons.logging.LogFactory;
      +import org.apache.hadoop.hive.ql.metadata.HiveException;
      +
      +import com.google.common.cache.Cache;
      +import com.google.common.cache.CacheBuilder;
      +
      +/**
      + * LlapObjectCache. Llap implementation for the shared object cache.
      + *
      + */
      +public class LlapObjectCache implements org.apache.hadoop.hive.ql.exec.ObjectCache {
      +
      + private static final Log LOG = LogFactory.getLog(LlapObjectCache.class.getName());
      +
      + private static final Cache<String, Object> registry
      + = CacheBuilder.newBuilder().softValues().build();
      +
      + private static final Map<String, ReentrantLock> locks
      + = new HashMap<String, ReentrantLock>();
      +
      + private static final ReentrantLock lock = new ReentrantLock();
      +
      + private static ExecutorService staticPool = Executors.newCachedThreadPool();
      +
      + private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();
      + private static final boolean isLogInfoEnabled = LOG.isInfoEnabled();
      +
      + public LlapObjectCache() {
      + }
      +
      + @Override
      + public void release(String key) {
      + // nothing to do, soft references will clean themselves up
      + }
      +
      + @Override
      + public <T> T retrieve(String key, Callable<T> fn) throws HiveException {
      +
      + T value = null;
      + ReentrantLock objectLock = null;
      +
      + lock.lock();
      + try {
      + value = (T) registry.getIfPresent(key);
      + if (value != null) {
      + if (isLogInfoEnabled) {
      + LOG.info("Found " + key + " in cache");
      + }
      + return value;
      + }
      +
      + if (locks.containsKey(key)) {
      + objectLock = locks.get(key);
      + } else {
      + objectLock = new ReentrantLock();
      + locks.put(key, objectLock);
      + }
      + } finally {
      + lock.unlock();
      + }
      +
      + objectLock.lock();
      + try{
      + lock.lock();
      + try {
      + value = (T) registry.getIfPresent(key);
      + if (value != null) {
      + if (isLogInfoEnabled) {
      + LOG.info("Found " + key + " in cache");
      + }
      + return value;
      + }
      + } finally {
      + lock.unlock();
      + }
      +
      + try {
      + value = fn.call();
      + } catch (Exception e) {
      + throw new HiveException(e);
      + }
      +
      + lock.lock();
      + try {
      + if (isLogInfoEnabled) {
      + LOG.info("Caching new object for key: " + key);
      + }
      +
      + registry.put(key, value);
      + locks.remove(key);
      + } finally {
      + lock.unlock();
      + }
      + } finally {
      + objectLock.unlock();
      + }
      + return value;
      + }
      +
      + @Override
      + public <T> Future<T> retrieveAsync(final String key, final Callable<T> fn) throws HiveException {
      + return staticPool.submit(new Callable<T>() {
      + @Override
      + public T call() throws Exception {
      + return retrieve(key, fn);
      + }
      + });
      + }
    ++
    ++ @Override
    ++ public void remove(String key) {
    ++ LOG.info("Removing key: " + key);
    ++ registry.invalidate(key);
    ++ }
      +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
    index 4908c91,4423cd1..0722b9f
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
    @@@ -44,9 -29,9 +44,10 @@@ import org.apache.hadoop.hive.ql.exec.F
       import org.apache.hadoop.hive.ql.exec.Heartbeater;
       import org.apache.hadoop.hive.ql.exec.MapOperator;
       import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
    + import org.apache.hadoop.hive.ql.exec.Utilities;
       import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
       import org.apache.hadoop.hive.ql.log.PerfLogger;
      +import org.apache.hadoop.hive.ql.plan.BaseWork;
       import org.apache.hadoop.hive.ql.session.SessionState;
       import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
       import org.apache.tez.common.counters.TaskCounter;

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    index 1ffb6e9,61ee8b9..530f388
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    @@@ -32,8 -32,7 +32,8 @@@ import org.apache.hadoop.conf.Configura
       import org.apache.hadoop.fs.FileSystem;
       import org.apache.hadoop.fs.Path;
       import org.apache.hadoop.hive.conf.HiveConf;
    - import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
    + import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
      +import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion;
       import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

       /**

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    index 03af687,a5a5943..97d09d4
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    @@@ -43,11 -42,11 +42,11 @@@ import org.apache.hadoop.hive.common.Di
       import org.apache.hadoop.hive.common.DiskRangeList.DiskRangeListCreateHelper;
       import org.apache.hadoop.hive.common.type.HiveDecimal;
       import org.apache.hadoop.hive.conf.HiveConf;
      +import org.apache.hadoop.hive.llap.io.api.cache.LlapMemoryBuffer;
       import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
       import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
    - import org.apache.hadoop.hive.ql.io.filters.BloomFilter;
    + import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
       import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool;
      -import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReader;
       import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
       import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
       import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
    @@@ -57,12 -56,12 +56,11 @@@ import org.apache.hadoop.hive.serde2.io
       import org.apache.hadoop.hive.shims.HadoopShims.ZeroCopyReaderShim;
       import org.apache.hadoop.io.Text;

      -class RecordReaderImpl implements RecordReader {
      -
      +public class RecordReaderImpl implements RecordReader {
         static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
    - private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
         private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();
      -
         private final Path path;
      + private final FileSystem fileSystem;
         private final FSDataInputStream file;
         private final long firstRow;
         private final List<StripeInformation> stripes =

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderUtils.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    ----------------------------------------------------------------------

    http://git-wip-us.apache.org/repos/asf/hive/blob/b8acbb6e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    ----------------------------------------------------------------------
  • Prasanthj at May 7, 2015 at 1:21 am
    HIVE-10423 - HIVE-7948 breaks deploy_e2e_artifacts.sh (Aswathy Chellammal Sreekumar via Eugene Koifman)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce736af2
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce736af2
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce736af2

    Branch: refs/heads/llap
    Commit: ce736af2a5025a4bb07b39362b064bd64aecdeef
    Parents: c42666e
    Author: Eugene Koifman <ekoifman@hortonworks.com>
    Authored: Mon May 4 18:58:41 2015 -0700
    Committer: Eugene Koifman <ekoifman@hortonworks.com>
    Committed: Mon May 4 18:58:41 2015 -0700

    ----------------------------------------------------------------------
      hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh | 3 +++
      1 file changed, 3 insertions(+)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/ce736af2/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    ----------------------------------------------------------------------
    diff --git a/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh b/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    index f446424..b834075 100755
    --- a/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    +++ b/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    @@ -25,6 +25,9 @@ source ./env.sh

      echo "Deploying artifacts to HDFS..."

    +rm -rf movielens-data
    +mkdir -p movielens-data
    +cd movielens-data
      curl -O http://files.grouplens.org/datasets/movielens/ml-1m.zip
      unzip ml-1m.zip
      mv ml-1m/ratings.dat ${PROJ_HOME}/hcatalog/src/test/e2e/templeton/inpdir/ratings.txt
  • Prasanthj at May 7, 2015 at 1:21 am
    HIVE-10286: SARGs: Type Safety via PredicateLeaf.type (Prasanth Jayachandran reviewed by Gopal V)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/59ecdd38
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/59ecdd38
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/59ecdd38

    Branch: refs/heads/llap
    Commit: 59ecdd38e3cbc0b312834a942092108f14e0672c
    Parents: 6b471da
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Fri May 1 12:05:32 2015 -0700
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Fri May 1 12:05:32 2015 -0700

    ----------------------------------------------------------------------
      .../hive/ql/io/orc/ColumnStatisticsImpl.java | 17 +-
      .../hive/ql/io/orc/DateColumnStatistics.java | 6 +-
      .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 222 +++++++++++--------
      .../hive/ql/io/sarg/SearchArgumentImpl.java | 85 ++-----
      .../hive/ql/io/orc/TestColumnStatistics.java | 20 +-
      .../hive/ql/io/orc/TestRecordReaderImpl.java | 170 ++++++++++++--
      .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 104 +++------
      .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 19 +-
      8 files changed, 367 insertions(+), 276 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    index 9c2c9c2..7cfbd81 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    @@ -17,6 +17,9 @@
       */
      package org.apache.hadoop.hive.ql.io.orc;

    +import java.sql.Date;
    +import java.sql.Timestamp;
    +
      import org.apache.hadoop.hive.common.type.HiveDecimal;
      import org.apache.hadoop.hive.serde2.io.DateWritable;
      import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    @@ -24,8 +27,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
      import org.apache.hadoop.io.BytesWritable;
      import org.apache.hadoop.io.Text;

    -import java.sql.Timestamp;
    -
      class ColumnStatisticsImpl implements ColumnStatistics {

        private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
    @@ -697,15 +698,15 @@ class ColumnStatisticsImpl implements ColumnStatistics {
          private transient final DateWritable maxDate = new DateWritable();

          @Override
    - public DateWritable getMinimum() {
    + public Date getMinimum() {
            minDate.set(minimum);
    - return minDate;
    + return minDate.get();
          }

          @Override
    - public DateWritable getMaximum() {
    + public Date getMaximum() {
            maxDate.set(maximum);
    - return maxDate;
    + return maxDate.get();
          }

          @Override
    @@ -713,9 +714,9 @@ class ColumnStatisticsImpl implements ColumnStatistics {
            StringBuilder buf = new StringBuilder(super.toString());
            if (getNumberOfValues() != 0) {
              buf.append(" min: ");
    - buf.append(minimum);
    + buf.append(getMinimum());
              buf.append(" max: ");
    - buf.append(maximum);
    + buf.append(getMaximum());
            }
            return buf.toString();
          }

    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
    index 03cdeef..ae3fe31 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java
    @@ -17,7 +17,7 @@
       */
      package org.apache.hadoop.hive.ql.io.orc;

    -import org.apache.hadoop.hive.serde2.io.DateWritable;
    +import java.util.Date;

      /**
       * Statistics for DATE columns.
    @@ -27,11 +27,11 @@ public interface DateColumnStatistics extends ColumnStatistics {
         * Get the minimum value for the column.
         * @return minimum value
         */
    - DateWritable getMinimum();
    + Date getMinimum();

        /**
         * Get the maximum value for the column.
         * @return maximum value
         */
    - DateWritable getMaximum();
    + Date getMaximum();
      }

    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    index 9e7ac4b..a5a5943 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    @@ -30,7 +30,6 @@ import java.util.HashMap;
      import java.util.List;
      import java.util.Map;

    -import org.apache.commons.lang.StringUtils;
      import org.apache.commons.lang3.exception.ExceptionUtils;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    @@ -60,7 +59,6 @@ import org.apache.hadoop.io.Text;
      class RecordReaderImpl implements RecordReader {

        static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
    - private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
        private static final boolean isLogDebugEnabled = LOG.isDebugEnabled();

        private final Path path;
    @@ -280,9 +278,9 @@ class RecordReaderImpl implements RecordReader {
            return ((TimestampColumnStatistics) index).getMaximum();
          } else if (index instanceof BooleanColumnStatistics) {
            if (((BooleanColumnStatistics)index).getTrueCount()!=0) {
    - return "true";
    + return Boolean.TRUE;
            } else {
    - return "false";
    + return Boolean.FALSE;
            }
          } else {
            return null;
    @@ -310,9 +308,9 @@ class RecordReaderImpl implements RecordReader {
            return ((TimestampColumnStatistics) index).getMinimum();
          } else if (index instanceof BooleanColumnStatistics) {
            if (((BooleanColumnStatistics)index).getFalseCount()!=0) {
    - return "false";
    + return Boolean.FALSE;
            } else {
    - return "true";
    + return Boolean.TRUE;
            }
          } else {
            return null;
    @@ -367,18 +365,12 @@ class RecordReaderImpl implements RecordReader {
          }

          TruthValue result;
    - // Predicate object and stats object can be one of the following base types
    - // LONG, DOUBLE, STRING, DATE, DECIMAL
    - // Out of these DATE is not implicitly convertible to other types and rest
    - // others are implicitly convertible. In cases where DATE cannot be converted
    - // the stats object is converted to text and comparison is performed.
    - // When STRINGs are converted to other base types, NumberFormat exception
    - // can occur in which case TruthValue.YES_NO_NULL value is returned
          try {
    - Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC);
    - Object minValue = getConvertedStatsObj(min, baseObj);
    - Object maxValue = getConvertedStatsObj(max, baseObj);
    - Object predObj = getBaseObjectForComparison(baseObj, minValue);
    + // Predicate object and stats objects are converted to the type of the predicate object.
    + Object baseObj = predicate.getLiteral();
    + Object minValue = getBaseObjectForComparison(predicate.getType(), min);
    + Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
    + Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);

            result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
            if (bloomFilter != null && result != TruthValue.NO_NULL && result != TruthValue.NO) {
    @@ -390,7 +382,11 @@ class RecordReaderImpl implements RecordReader {
              LOG.warn("Exception when evaluating predicate. Skipping ORC PPD." +
                  " Exception: " + ExceptionUtils.getStackTrace(e));
            }
    - result = hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
    + if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull) {
    + result = TruthValue.YES_NO;
    + } else {
    + result = TruthValue.YES_NO_NULL;
    + }
          }
          return result;
        }
    @@ -440,8 +436,8 @@ class RecordReaderImpl implements RecordReader {
              if (minValue.equals(maxValue)) {
                // for a single value, look through to see if that value is in the
                // set
    - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
    - predObj = getBaseObjectForComparison(arg, minValue);
    + for (Object arg : predicate.getLiteralList()) {
    + predObj = getBaseObjectForComparison(predicate.getType(), arg);
                  loc = compareToRange((Comparable) predObj, minValue, maxValue);
                  if (loc == Location.MIN) {
                    return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
    @@ -450,8 +446,8 @@ class RecordReaderImpl implements RecordReader {
                return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
              } else {
                // are all of the values outside of the range?
    - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
    - predObj = getBaseObjectForComparison(arg, minValue);
    + for (Object arg : predicate.getLiteralList()) {
    + predObj = getBaseObjectForComparison(predicate.getType(), arg);
                  loc = compareToRange((Comparable) predObj, minValue, maxValue);
                  if (loc == Location.MIN || loc == Location.MIDDLE ||
                      loc == Location.MAX) {
    @@ -461,12 +457,12 @@ class RecordReaderImpl implements RecordReader {
                return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
              }
            case BETWEEN:
    - List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC);
    - Object predObj1 = getBaseObjectForComparison(args.get(0), minValue);
    + List<Object> args = predicate.getLiteralList();
    + Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0));

              loc = compareToRange((Comparable) predObj1, minValue, maxValue);
              if (loc == Location.BEFORE || loc == Location.MIN) {
    - Object predObj2 = getBaseObjectForComparison(args.get(1), minValue);
    + Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1));

                Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
                if (loc2 == Location.AFTER || loc2 == Location.MAX) {
    @@ -489,8 +485,8 @@ class RecordReaderImpl implements RecordReader {
          }
        }

    - private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, Object predObj,
    - BloomFilterIO bloomFilter, boolean hasNull) {
    + private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate,
    + final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) {
          switch (predicate.getOperator()) {
            case NULL_SAFE_EQUALS:
              // null safe equals does not return *_NULL variant. So set hasNull to false
    @@ -498,9 +494,10 @@ class RecordReaderImpl implements RecordReader {
            case EQUALS:
              return checkInBloomFilter(bloomFilter, predObj, hasNull);
            case IN:
    - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) {
    + for (Object arg : predicate.getLiteralList()) {
                // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe
    - TruthValue result = checkInBloomFilter(bloomFilter, arg, hasNull);
    + Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg);
    + TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull);
                if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) {
                  return result;
                }
    @@ -527,14 +524,6 @@ class RecordReaderImpl implements RecordReader {
            if (bf.testString(predObj.toString())) {
              result = TruthValue.YES_NO_NULL;
            }
    - } else if (predObj instanceof Date) {
    - if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
    - result = TruthValue.YES_NO_NULL;
    - }
    - } else if (predObj instanceof DateWritable) {
    - if (bf.testLong(((DateWritable) predObj).getDays())) {
    - result = TruthValue.YES_NO_NULL;
    - }
          } else if (predObj instanceof Timestamp) {
            if (bf.testLong(((Timestamp) predObj).getTime())) {
              result = TruthValue.YES_NO_NULL;
    @@ -543,14 +532,18 @@ class RecordReaderImpl implements RecordReader {
            if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) {
              result = TruthValue.YES_NO_NULL;
            }
    - } else {
    - // if the predicate object is null and if hasNull says there are no nulls then return NO
    - if (predObj == null && !hasNull) {
    - result = TruthValue.NO;
    - } else {
    + } else if (predObj instanceof Date) {
    + if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
              result = TruthValue.YES_NO_NULL;
            }
    - }
    + } else {
    + // if the predicate object is null and if hasNull says there are no nulls then return NO
    + if (predObj == null && !hasNull) {
    + result = TruthValue.NO;
    + } else {
    + result = TruthValue.YES_NO_NULL;
    + }
    + }

          if (result == TruthValue.YES_NO_NULL && !hasNull) {
            result = TruthValue.YES_NO;
    @@ -563,58 +556,109 @@ class RecordReaderImpl implements RecordReader {
          return result;
        }

    - private static Object getBaseObjectForComparison(Object predObj, Object statsObj) {
    - if (predObj != null) {
    - if (predObj instanceof ExprNodeConstantDesc) {
    - predObj = ((ExprNodeConstantDesc) predObj).getValue();
    + private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) {
    + if (obj != null) {
    + if (obj instanceof ExprNodeConstantDesc) {
    + obj = ((ExprNodeConstantDesc) obj).getValue();
            }
    - // following are implicitly convertible
    - if (statsObj instanceof Long) {
    - if (predObj instanceof Double) {
    - return ((Double) predObj).longValue();
    - } else if (predObj instanceof HiveDecimal) {
    - return ((HiveDecimal) predObj).longValue();
    - } else if (predObj instanceof String) {
    - return Long.valueOf(predObj.toString());
    + } else {
    + return null;
    + }
    + switch (type) {
    + case BOOLEAN:
    + if (obj instanceof Boolean) {
    + return obj;
    + } else {
    + // will only be true if the string conversion yields "true", all other values are
    + // considered false
    + return Boolean.valueOf(obj.toString());
              }
    - } else if (statsObj instanceof Double) {
    - if (predObj instanceof Long) {
    - return ((Long) predObj).doubleValue();
    - } else if (predObj instanceof HiveDecimal) {
    - return ((HiveDecimal) predObj).doubleValue();
    - } else if (predObj instanceof String) {
    - return Double.valueOf(predObj.toString());
    + case DATE:
    + if (obj instanceof Date) {
    + return obj;
    + } else if (obj instanceof String) {
    + return Date.valueOf((String) obj);
    + } else if (obj instanceof Timestamp) {
    + return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L);
              }
    - } else if (statsObj instanceof String) {
    - return predObj.toString();
    - } else if (statsObj instanceof HiveDecimal) {
    - if (predObj instanceof Long) {
    - return HiveDecimal.create(((Long) predObj));
    - } else if (predObj instanceof Double) {
    - return HiveDecimal.create(predObj.toString());
    - } else if (predObj instanceof String) {
    - return HiveDecimal.create(predObj.toString());
    - } else if (predObj instanceof BigDecimal) {
    - return HiveDecimal.create((BigDecimal)predObj);
    + // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?)
    + break;
    + case DECIMAL:
    + if (obj instanceof Boolean) {
    + return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO;
    + } else if (obj instanceof Integer) {
    + return HiveDecimal.create(((Integer) obj).intValue());
    + } else if (obj instanceof Long) {
    + return HiveDecimal.create(((Long) obj));
    + } else if (obj instanceof Float || obj instanceof Double ||
    + obj instanceof String) {
    + return HiveDecimal.create(obj.toString());
    + } else if (obj instanceof BigDecimal) {
    + return HiveDecimal.create((BigDecimal) obj);
    + } else if (obj instanceof HiveDecimal) {
    + return obj;
    + } else if (obj instanceof Timestamp) {
    + return HiveDecimal.create(
    + new Double(new TimestampWritable((Timestamp) obj).getDouble()).toString());
              }
    - }
    - }
    - return predObj;
    - }
    -
    - private static Object getConvertedStatsObj(Object statsObj, Object predObj) {
    -
    - // converting between date and other types is not implicit, so convert to
    - // text for comparison
    - if (((predObj instanceof DateWritable) && !(statsObj instanceof DateWritable))
    - || ((statsObj instanceof DateWritable) && !(predObj instanceof DateWritable))) {
    - return StringUtils.stripEnd(statsObj.toString(), null);
    + break;
    + case FLOAT:
    + if (obj instanceof Number) {
    + // widening conversion
    + return ((Number) obj).doubleValue();
    + } else if (obj instanceof HiveDecimal) {
    + return ((HiveDecimal) obj).doubleValue();
    + } else if (obj instanceof String) {
    + return Double.valueOf(obj.toString());
    + } else if (obj instanceof Timestamp) {
    + return new TimestampWritable((Timestamp)obj).getDouble();
    + } else if (obj instanceof HiveDecimal) {
    + return ((HiveDecimal) obj).doubleValue();
    + } else if (obj instanceof BigDecimal) {
    + return ((BigDecimal) obj).doubleValue();
    + }
    + break;
    + case INTEGER:
    + // fall through
    + case LONG:
    + if (obj instanceof Number) {
    + // widening conversion
    + return ((Number) obj).longValue();
    + } else if (obj instanceof HiveDecimal) {
    + return ((HiveDecimal) obj).longValue();
    + } else if (obj instanceof String) {
    + return Long.valueOf(obj.toString());
    + }
    + break;
    + case STRING:
    + if (obj != null) {
    + return (obj.toString());
    + }
    + break;
    + case TIMESTAMP:
    + if (obj instanceof Timestamp) {
    + return obj;
    + } else if (obj instanceof Float) {
    + return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue());
    + } else if (obj instanceof Double) {
    + return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue());
    + } else if (obj instanceof HiveDecimal) {
    + return TimestampWritable.decimalToTimestamp((HiveDecimal) obj);
    + } else if (obj instanceof Date) {
    + return new Timestamp(((Date) obj).getTime());
    + }
    + // float/double conversion to timestamp is interpreted as seconds whereas integer conversion
    + // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting
    + // is also config driven. The filter operator changes its promotion based on config:
    + // "int.timestamp.conversion.in.seconds". Disable PPD for integer cases.
    + break;
    + default:
    + break;
          }

    - if (statsObj instanceof String) {
    - return StringUtils.stripEnd(statsObj.toString(), null);
    - }
    - return statsObj;
    + throw new IllegalArgumentException(String.format(
    + "ORC SARGS could not convert from %s to %s", obj == null ? "(null)" : obj.getClass()
    + .getSimpleName(), type));
        }

        public static class SargApplier {

    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    index a451bfb..efe03ab 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    @@ -18,9 +18,15 @@

      package org.apache.hadoop.hive.ql.io.sarg;

    -import com.esotericsoftware.kryo.Kryo;
    -import com.esotericsoftware.kryo.io.Input;
    -import com.esotericsoftware.kryo.io.Output;
    +import java.math.BigDecimal;
    +import java.sql.Timestamp;
    +import java.util.ArrayDeque;
    +import java.util.ArrayList;
    +import java.util.Collections;
    +import java.util.Deque;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;

      import org.apache.commons.codec.binary.Base64;
      import org.apache.commons.lang.StringUtils;
    @@ -54,15 +60,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
      import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
      import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

    -import java.math.BigDecimal;
    -import java.sql.Timestamp;
    -import java.util.ArrayDeque;
    -import java.util.ArrayList;
    -import java.util.Collections;
    -import java.util.Deque;
    -import java.util.HashMap;
    -import java.util.List;
    -import java.util.Map;
    +import com.esotericsoftware.kryo.Kryo;
    +import com.esotericsoftware.kryo.io.Input;
    +import com.esotericsoftware.kryo.io.Output;

      import parquet.filter2.predicate.FilterApi;
      import parquet.filter2.predicate.FilterPredicate;
    @@ -116,55 +116,12 @@ final class SearchArgumentImpl implements SearchArgument {
          }

          @Override
    - public Object getLiteral(FileFormat format) {
    - // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
    - // (https://github.com/EsotericSoftware/kryo/issues/88)
    - // When we see a Date, convert back into Timestamp
    - if (literal instanceof java.util.Date) {
    - return new Timestamp(((java.util.Date) literal).getTime());
    - }
    -
    - switch (format) {
    - case ORC:
    - // adapt base type to what orc needs
    - if (literal instanceof Integer) {
    - return ((Number) literal).longValue();
    - }
    - return literal;
    - case PARQUET:
    - return literal;
    - default:
    - throw new RuntimeException(
    - "File format " + format + "is not support to build search arguments");
    - }
    + public Object getLiteral() {
    + return literal;
          }

          @Override
    - public List<Object> getLiteralList(FileFormat format) {
    - switch (format) {
    - case ORC:
    - return getOrcLiteralList();
    - case PARQUET:
    - return getParquetLiteralList();
    - default:
    - throw new RuntimeException("File format is not support to build search arguments");
    - }
    - }
    -
    - private List<Object> getOrcLiteralList() {
    - // no need to cast
    - if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof
    - Integer)) {
    - return literalList;
    - }
    - List<Object> result = new ArrayList<Object>(literalList.size());
    - for (Object o : literalList) {
    - result.add(((Number) o).longValue());
    - }
    - return result;
    - }
    -
    - private List<Object> getParquetLiteralList() {
    + public List<Object> getLiteralList() {
            return literalList;
          }

    @@ -350,13 +307,17 @@ final class SearchArgumentImpl implements SearchArgument {
            try {
              builder = leafFilterFactory
                .getLeafFilterBuilderByType(leaf.getType());
    - if (builder == null) return null;
    + if (builder == null) {
    + return null;
    + }
              if (isMultiLiteralsOperator(leaf.getOperator())) {
    - return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList(
    - PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName());
    + return builder.buildPredicate(leaf.getOperator(),
    + leaf.getLiteralList(),
    + leaf.getColumnName());
              } else {
                return builder
    - .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET),
    + .buildPredict(leaf.getOperator(),
    + leaf.getLiteral(),
                    leaf.getColumnName());
              }
            } catch (Exception e) {

    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    index 5c7fe60..4d30377 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
    @@ -20,6 +20,12 @@ package org.apache.hadoop.hive.ql.io.orc;

      import static junit.framework.Assert.assertEquals;

    +import java.io.File;
    +import java.io.FileOutputStream;
    +import java.io.PrintStream;
    +import java.sql.Timestamp;
    +import java.util.List;
    +
      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
    @@ -35,12 +41,6 @@ import org.junit.Rule;
      import org.junit.Test;
      import org.junit.rules.TestName;

    -import java.io.File;
    -import java.io.FileOutputStream;
    -import java.io.PrintStream;
    -import java.sql.Timestamp;
    -import java.util.List;
    -
      /**
       * Test ColumnStatisticsImpl for ORC.
       */
    @@ -130,14 +130,14 @@ public class TestColumnStatistics {
          stats2.updateDate(new DateWritable(2000));
          stats1.merge(stats2);
          DateColumnStatistics typed = (DateColumnStatistics) stats1;
    - assertEquals(new DateWritable(10), typed.getMinimum());
    - assertEquals(new DateWritable(2000), typed.getMaximum());
    + assertEquals(new DateWritable(10).get(), typed.getMinimum());
    + assertEquals(new DateWritable(2000).get(), typed.getMaximum());
          stats1.reset();
          stats1.updateDate(new DateWritable(-10));
          stats1.updateDate(new DateWritable(10000));
          stats1.merge(stats2);
    - assertEquals(-10, typed.getMinimum().getDays());
    - assertEquals(10000, typed.getMaximum().getDays());
    + assertEquals(new DateWritable(-10).get(), typed.getMinimum());
    + assertEquals(new DateWritable(10000).get(), typed.getMaximum());
        }

        @Test

    http://git-wip-us.apache.org/repos/asf/hive/blob/59ecdd38/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    index 78d779c..957f54e 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java
    @@ -219,7 +219,8 @@ public class TestRecordReaderImpl {

        @Test
        public void testGetMin() throws Exception {
    - assertEquals(10L, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
    + assertEquals(10L, RecordReaderImpl.getMin(
    + ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
          assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
              OrcProto.ColumnStatistics.newBuilder()
                  .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
    @@ -252,6 +253,13 @@ public class TestRecordReaderImpl {
              .setIntStatistics(intStats.build()).build();
        }

    + private static OrcProto.ColumnStatistics createBooleanStats(int n, int trueCount) {
    + OrcProto.BucketStatistics.Builder boolStats = OrcProto.BucketStatistics.newBuilder();
    + boolStats.addCount(trueCount);
    + return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(n).setBucketStatistics(
    + boolStats.build()).build();
    + }
    +
        private static OrcProto.ColumnStatistics createIntStats(int min, int max) {
          OrcProto.IntegerStatistics.Builder intStats = OrcProto.IntegerStatistics.newBuilder();
          intStats.setMinimum(min);
    @@ -289,7 +297,7 @@ public class TestRecordReaderImpl {
          return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build();
        }

    - private static OrcProto.ColumnStatistics createTimestampStats(int min, int max) {
    + private static OrcProto.ColumnStatistics createTimestampStats(long min, long max) {
          OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder();
          tsStats.setMinimum(min);
          tsStats.setMaximum(max);
    @@ -334,6 +342,30 @@ public class TestRecordReaderImpl {
        }

        @Test
    + public void testPredEvalWithBooleanStats() throws Exception {
    + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "true", null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "hello", null);
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null));
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null));
    + }
    +
    + @Test
        public void testPredEvalWithIntStats() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
              PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
    @@ -345,20 +377,27 @@ public class TestRecordReaderImpl {
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));

    + // Stats gets converted to column type. "15" is outside of "10" and "100"
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.STRING, "x", "15", null);
    - assertEquals(TruthValue.YES_NO,
    + assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));

    + // Integer stats will not be converted date because of days/seconds/millis ambiguity
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
    - assertEquals(TruthValue.NO,
    + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
    + assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null));
        }

        @Test
    @@ -373,20 +412,32 @@ public class TestRecordReaderImpl {
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));

    + // Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0"
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.STRING, "x", "15", null);
    - assertEquals(TruthValue.YES_NO,
    + assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));

    + // Double is not converted to date type because of days/seconds/millis ambiguity
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
    - assertEquals(TruthValue.NO,
    + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
    + assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null);
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null));
        }

        @Test
    @@ -406,27 +457,35 @@ public class TestRecordReaderImpl {
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));

    + // IllegalArgumentException is thrown when converting String to Date, hence YES_NO
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(100), null);
    - assertEquals(TruthValue.NO,
    + PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
    + assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null));
        }

        @Test
        public void testPredEvalWithDateStats() throws Exception {
          PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
              PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
    - assertEquals(TruthValue.NO,
    + // Date to Integer conversion is not possible.
    + assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

    + // Date to Float conversion is also not possible.
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.FLOAT, "x", 15.0, null);
    - assertEquals(TruthValue.NO,
    + assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    @@ -460,20 +519,30 @@ public class TestRecordReaderImpl {
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
    + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(150), null);
    + PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
          assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

    + // Date to Decimal conversion is also not possible.
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
          assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));

    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null));
        }

        @Test
    @@ -488,21 +557,79 @@ public class TestRecordReaderImpl {
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));

    + // "15" out of range of "10.0" and "100.0"
          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
              PredicateLeaf.Type.STRING, "x", "15", null);
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
    +
    + // Decimal to Date not possible.
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
          assertEquals(TruthValue.YES_NO,
              RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));

          pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null);
    + PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null);
          assertEquals(TruthValue.NO,
              RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null));
    + }
    +
    + @Test
    + public void testPredEvalWithTimestampStats() throws Exception {
    + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf(
    + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null);
    + assertEquals(TruthValue.YES_NO,
    + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
    +
    + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
    + PredicateLeaf.Type.FLOAT, "x", 15.0, null);
    + assertEquals(TruthValue.NO,
    + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null));
    + assertEquals(TruthValue.YES_NO,
    +