FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-2.0 971046a55 -> 4c8e47eee


http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/results/clientpositive/orc_llap.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_llap.q.out b/ql/src/test/results/clientpositive/orc_llap.q.out
index c9bb3c8..742c7d8 100644
--- a/ql/src/test/results/clientpositive/orc_llap.q.out
+++ b/ql/src/test/results/clientpositive/orc_llap.q.out
@@ -698,17 +698,17 @@ STAGE PLANS:
            TableScan
              alias: orc_llap
              filterExpr: ((cint > 10) and cbigint is not null) (type: boolean)
- Statistics: Num rows: 99583 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 98779 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: ((cint > 10) and cbigint is not null) (type: boolean)
- Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint)
                  outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -768,17 +768,17 @@ STAGE PLANS:
            TableScan
              alias: orc_llap
              filterExpr: ((cint > 10) and cbigint is not null) (type: boolean)
- Statistics: Num rows: 4979 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4938 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: ((cint > 10) and cbigint is not null) (type: boolean)
- Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -838,17 +838,17 @@ STAGE PLANS:
            TableScan
              alias: orc_llap
              filterExpr: ((cint > 5) and (cint < 10)) (type: boolean)
- Statistics: Num rows: 15320 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15196 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: ((cint > 5) and (cint < 10)) (type: boolean)
- Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: cstring2 (type: string)
                  outputColumnNames: _col0
- Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
- Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -907,22 +907,22 @@ STAGE PLANS:
        Map Operator Tree:
            TableScan
              alias: orc_llap
- Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Select Operator
                expressions: cstring1 (type: string), cstring2 (type: string)
                outputColumnNames: cstring1, cstring2
- Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                Group By Operator
                  aggregations: count()
                  keys: cstring1 (type: string), cstring2 (type: string)
                  mode: hash
                  outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Reduce Output Operator
                    key expressions: _col0 (type: string), _col1 (type: string)
                    sort order: ++
                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                    value expressions: _col2 (type: bigint)
        Execution mode: vectorized
        LLAP IO: all inputs
@@ -932,10 +932,10 @@ STAGE PLANS:
            keys: KEY._col0 (type: string), KEY._col1 (type: string)
            mode: mergepartial
            outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE
            File Output Operator
              compressed: false
- Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -999,14 +999,14 @@ STAGE PLANS:
            TableScan
              alias: o1
              filterExpr: (cbigint is not null and csmallint is not null) (type: boolean)
- Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: (cbigint is not null and csmallint is not null) (type: boolean)
- Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: csmallint (type: smallint), cstring1 (type: string)
                  outputColumnNames: _col0, _col2
- Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                  HashTable Sink Operator
                    keys:
                      0 _col0 (type: smallint)
@@ -1018,14 +1018,14 @@ STAGE PLANS:
            TableScan
              alias: o1
              filterExpr: (cbigint is not null and csmallint is not null) (type: boolean)
- Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
              Filter Operator
                predicate: (cbigint is not null and csmallint is not null) (type: boolean)
- Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: csmallint (type: smallint), cstring2 (type: string)
                  outputColumnNames: _col0, _col2
- Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                  Map Join Operator
                    condition map:
                         Inner Join 0 to 1
@@ -1033,14 +1033,14 @@ STAGE PLANS:
                      0 _col0 (type: smallint)
                      1 _col0 (type: smallint)
                    outputColumnNames: _col2, _col5
- Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: _col2 (type: string), _col5 (type: string)
                      outputColumnNames: _col0, _col1
- Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
- Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/results/clientpositive/orc_merge11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out
index da608db..f0769d4 100644
--- a/ql/src/test/results/clientpositive/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/orc_merge11.q.out
@@ -96,22 +96,22 @@ File Statistics:
    Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -120,12 +120,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 11071 bytes
+File length: 6849 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --
@@ -155,22 +155,22 @@ File Statistics:
    Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -179,12 +179,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 11071 bytes
+File length: 6849 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --
@@ -244,22 +244,22 @@ File Statistics:
    Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -268,26 +268,26 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
- Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
- Stream: column 0 section ROW_INDEX start: 10733 length 17
- Stream: column 1 section ROW_INDEX start: 10750 length 85
- Stream: column 2 section ROW_INDEX start: 10835 length 87
- Stream: column 3 section ROW_INDEX start: 10922 length 111
- Stream: column 4 section ROW_INDEX start: 11033 length 108
- Stream: column 5 section ROW_INDEX start: 11141 length 101
- Stream: column 1 section DATA start: 11242 length 871
- Stream: column 2 section DATA start: 12113 length 362
- Stream: column 2 section LENGTH start: 12475 length 8
- Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
- Stream: column 3 section DATA start: 12506 length 5167
- Stream: column 4 section DATA start: 17673 length 524
- Stream: column 4 section SECONDARY start: 18197 length 118
- Stream: column 5 section DATA start: 18315 length 2913
- Stream: column 5 section SECONDARY start: 21228 length 118
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
+ Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
+ Stream: column 0 section ROW_INDEX start: 6511 length 17
+ Stream: column 1 section ROW_INDEX start: 6528 length 83
+ Stream: column 2 section ROW_INDEX start: 6611 length 81
+ Stream: column 3 section ROW_INDEX start: 6692 length 111
+ Stream: column 4 section ROW_INDEX start: 6803 length 110
+ Stream: column 5 section ROW_INDEX start: 6913 length 96
+ Stream: column 1 section DATA start: 7009 length 45
+ Stream: column 2 section DATA start: 7054 length 41
+ Stream: column 2 section LENGTH start: 7095 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
+ Stream: column 3 section DATA start: 7126 length 5167
+ Stream: column 4 section DATA start: 12293 length 524
+ Stream: column 4 section SECONDARY start: 12817 length 18
+ Stream: column 5 section DATA start: 12835 length 53
+ Stream: column 5 section SECONDARY start: 12888 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -296,12 +296,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 21814 bytes
+File length: 13369 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --

http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
index 2b13dc6..1402a3f 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
@@ -813,11 +813,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int)
                      outputColumnNames: _col0
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Outer Join0 to 1
@@ -826,7 +826,7 @@ STAGE PLANS:
                          1 _col0 (type: int)
                        input vertices:
                          1 Map 3
- Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                        Group By Operator
                          aggregations: count()
                          mode: hash
@@ -934,11 +934,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int), cmodint (type: int)
                      outputColumnNames: _col0, _col1
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Outer Join0 to 1
@@ -950,7 +950,7 @@ STAGE PLANS:
                          1 _col0 (type: int)
                        input vertices:
                          1 Map 3
- Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                        Group By Operator
                          aggregations: count()
                          mode: hash
@@ -1058,11 +1058,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int), cmodint (type: int)
                      outputColumnNames: _col0, _col1
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Outer Join0 to 1
@@ -1074,7 +1074,7 @@ STAGE PLANS:
                          1 _col0 (type: int)
                        input vertices:
                          1 Map 3
- Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                        Group By Operator
                          aggregations: count()
                          mode: hash
@@ -1182,11 +1182,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int)
                      outputColumnNames: _col0
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Outer Join0 to 1
@@ -1198,7 +1198,7 @@ STAGE PLANS:
                          1 _col0 (type: int)
                        input vertices:
                          1 Map 3
- Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                        Group By Operator
                          aggregations: count()
                          mode: hash
@@ -1300,11 +1300,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int)
                      outputColumnNames: _col0
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Spark HashTable Sink Operator
                        keys:
                          0 _col0 (type: int)
@@ -1323,11 +1323,11 @@ STAGE PLANS:
              Map Operator Tree:
                  TableScan
                    alias: s
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cmodtinyint (type: int), cmodint (type: int)
                      outputColumnNames: _col0, _col1
- Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Left Outer Join0 to 1
@@ -1337,7 +1337,7 @@ STAGE PLANS:
                        outputColumnNames: _col0
                        input vertices:
                          1 Map 3
- Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                        Map Join Operator
                          condition map:
                               Left Outer Join0 to 1
@@ -1346,7 +1346,7 @@ STAGE PLANS:
                            1 _col0 (type: int)
                          input vertices:
                            1 Map 4
- Statistics: Num rows: 7329 Data size: 3379 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE
                          Group By Operator
                            aggregations: count()
                            mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
index da608db..f0769d4 100644
--- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
@@ -96,22 +96,22 @@ File Statistics:
    Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -120,12 +120,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 11071 bytes
+File length: 6849 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --
@@ -155,22 +155,22 @@ File Statistics:
    Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -179,12 +179,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 11071 bytes
+File length: 6849 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --
@@ -244,22 +244,22 @@ File Statistics:
    Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

  Stripes:
- Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
+ Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
      Stream: column 0 section ROW_INDEX start: 3 length 17
- Stream: column 1 section ROW_INDEX start: 20 length 85
- Stream: column 2 section ROW_INDEX start: 105 length 87
- Stream: column 3 section ROW_INDEX start: 192 length 111
- Stream: column 4 section ROW_INDEX start: 303 length 108
- Stream: column 5 section ROW_INDEX start: 411 length 101
- Stream: column 1 section DATA start: 512 length 871
- Stream: column 2 section DATA start: 1383 length 362
- Stream: column 2 section LENGTH start: 1745 length 8
- Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
- Stream: column 3 section DATA start: 1776 length 5167
- Stream: column 4 section DATA start: 6943 length 524
- Stream: column 4 section SECONDARY start: 7467 length 118
- Stream: column 5 section DATA start: 7585 length 2913
- Stream: column 5 section SECONDARY start: 10498 length 118
+ Stream: column 1 section ROW_INDEX start: 20 length 83
+ Stream: column 2 section ROW_INDEX start: 103 length 81
+ Stream: column 3 section ROW_INDEX start: 184 length 111
+ Stream: column 4 section ROW_INDEX start: 295 length 110
+ Stream: column 5 section ROW_INDEX start: 405 length 96
+ Stream: column 1 section DATA start: 501 length 45
+ Stream: column 2 section DATA start: 546 length 41
+ Stream: column 2 section LENGTH start: 587 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 595 length 23
+ Stream: column 3 section DATA start: 618 length 5167
+ Stream: column 4 section DATA start: 5785 length 524
+ Stream: column 4 section SECONDARY start: 6309 length 18
+ Stream: column 5 section DATA start: 6327 length 53
+ Stream: column 5 section SECONDARY start: 6380 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -268,26 +268,26 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
- Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
- Stream: column 0 section ROW_INDEX start: 10733 length 17
- Stream: column 1 section ROW_INDEX start: 10750 length 85
- Stream: column 2 section ROW_INDEX start: 10835 length 87
- Stream: column 3 section ROW_INDEX start: 10922 length 111
- Stream: column 4 section ROW_INDEX start: 11033 length 108
- Stream: column 5 section ROW_INDEX start: 11141 length 101
- Stream: column 1 section DATA start: 11242 length 871
- Stream: column 2 section DATA start: 12113 length 362
- Stream: column 2 section LENGTH start: 12475 length 8
- Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
- Stream: column 3 section DATA start: 12506 length 5167
- Stream: column 4 section DATA start: 17673 length 524
- Stream: column 4 section SECONDARY start: 18197 length 118
- Stream: column 5 section DATA start: 18315 length 2913
- Stream: column 5 section SECONDARY start: 21228 length 118
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
+ Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
+ Stream: column 0 section ROW_INDEX start: 6511 length 17
+ Stream: column 1 section ROW_INDEX start: 6528 length 83
+ Stream: column 2 section ROW_INDEX start: 6611 length 81
+ Stream: column 3 section ROW_INDEX start: 6692 length 111
+ Stream: column 4 section ROW_INDEX start: 6803 length 110
+ Stream: column 5 section ROW_INDEX start: 6913 length 96
+ Stream: column 1 section DATA start: 7009 length 45
+ Stream: column 2 section DATA start: 7054 length 41
+ Stream: column 2 section LENGTH start: 7095 length 8
+ Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
+ Stream: column 3 section DATA start: 7126 length 5167
+ Stream: column 4 section DATA start: 12293 length 524
+ Stream: column 4 section SECONDARY start: 12817 length 18
+ Stream: column 5 section DATA start: 12835 length 53
+ Stream: column 5 section SECONDARY start: 12888 length 18
      Encoding column 0: DIRECT
      Encoding column 1: DIRECT_V2
      Encoding column 2: DICTIONARY_V2[6]
@@ -296,12 +296,12 @@ Stripes:
      Encoding column 5: DIRECT_V2
      Row group indices for column 1:
        Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
- Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
- Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
- Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
- Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
+ Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
+ Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
+ Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
+ Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

-File length: 21814 bytes
+File length: 13369 bytes
  Padding length: 0 bytes
  Padding ratio: 0%
  -- END ORC FILE DUMP --

Search Discussions

  • Prasanthj at Dec 3, 2015 at 6:21 am
    HIVE-12537: RLEv2 doesn't seem to work (Prasanth Jayachandran reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4c8e47ee
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4c8e47ee
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4c8e47ee

    Branch: refs/heads/branch-2.0
    Commit: 4c8e47eeec485c8065b58bf3e0955172a41eea2d
    Parents: 971046a
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 3 00:21:21 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 3 00:21:21 2015 -0600

    ----------------------------------------------------------------------
      .../ql/io/orc/RunLengthIntegerWriterV2.java | 43 +-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 4 +-
      .../ql/io/orc/TestOrcRawRecordMerger.java.orig | 1150 ------------------
      .../apache/hadoop/hive/ql/io/orc/TestRLEv2.java | 297 +++++
      .../hive/ql/io/orc/TestVectorOrcFile.java | 4 +-
      ql/src/test/resources/orc-file-has-null.out | 94 +-
      .../results/clientpositive/orc_file_dump.q.out | 156 +--
      .../test/results/clientpositive/orc_llap.q.out | 54 +-
      .../results/clientpositive/orc_merge11.q.out | 160 +--
      .../spark/vector_outer_join5.q.out | 36 +-
      .../clientpositive/tez/orc_merge11.q.out | 160 +--
      11 files changed, 652 insertions(+), 1506 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    index 6344a66..95f8cc8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    @@ -118,8 +118,8 @@ import java.io.IOException;
       * <li>8 bits for lower run length bits</li>
       * </ul>
       * </li>
    - * <li>Base value - encoded as varint</li>
    - * <li>Delta base - encoded as varint</li>
    + * <li>Base value - zigzag encoded value written as varint</li>
    + * <li>Delta base - zigzag encoded value written as varint</li>
       * <li>Delta blob - only positive values. monotonicity and orderness are decided
       * based on the sign of the base value and delta base</li>
       * </ul>
    @@ -472,29 +472,28 @@ class RunLengthIntegerWriterV2 implements IntegerWriter {
          // invariant - subtracting any number from any other in the literals after
          // this point won't overflow

    + // if min is equal to max then the delta is 0, this condition happens for
    + // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    + if (min == max) {
    + assert isFixedDelta : min + "==" + max +
    + ", isFixedDelta cannot be false";
    + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    + fixedDelta = 0;
    + encoding = EncodingType.DELTA;
    + return;
    + }
    +
    + if (isFixedDelta) {
    + assert currDelta == initialDelta
    + : "currDelta should be equal to initialDelta for fixed delta encoding";
    + encoding = EncodingType.DELTA;
    + fixedDelta = currDelta;
    + return;
    + }
    +
          // if initialDelta is 0 then we cannot delta encode as we cannot identify
          // the sign of deltas (increasing or decreasing)
          if (initialDelta != 0) {
    -
    - // if min is equal to max then the delta is 0, this condition happens for
    - // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    - if (min == max) {
    - assert isFixedDelta : min + "==" + max +
    - ", isFixedDelta cannot be false";
    - assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    - fixedDelta = 0;
    - encoding = EncodingType.DELTA;
    - return;
    - }
    -
    - if (isFixedDelta) {
    - assert currDelta == initialDelta
    - : "currDelta should be equal to initialDelta for fixed delta encoding";
    - encoding = EncodingType.DELTA;
    - fixedDelta = currDelta;
    - return;
    - }
    -
            // stores the number of bits required for packing delta blob in
            // delta encoding
            bitsDeltaMax = utils.findClosestNumBits(deltaMax);

    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    index 146f5b1..2992f3c 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    @@ -1895,9 +1895,9 @@ public class TestOrcFile {
                stripe.getDataLength() < 5000);
          }
          // with HIVE-7832, the dictionaries will be disabled after writing the first
    - // stripe as there are too many distinct values. Hence only 4 stripes as
    + // stripe as there are too many distinct values. Hence only 3 stripes as
          // compared to 25 stripes in version 0.11 (above test case)
    - assertEquals(4, i);
    + assertEquals(3, i);
          assertEquals(2500, reader.getNumberOfRows());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    deleted file mode 100644
    index 15ee24c..0000000
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    +++ /dev/null
    @@ -1,1150 +0,0 @@
    -/**
    - * Licensed to the Apache Software Foundation (ASF) under one
    - * or more contributor license agreements. See the NOTICE file
    - * distributed with this work for additional information
    - * regarding copyright ownership. The ASF licenses this file
    - * to you under the Apache License, Version 2.0 (the
    - * "License"); you may not use this file except in compliance
    - * with the License. You may obtain a copy of the License at
    - *
    - * http://www.apache.org/licenses/LICENSE-2.0
    - *
    - * Unless required by applicable law or agreed to in writing, software
    - * distributed under the License is distributed on an "AS IS" BASIS,
    - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    - * See the License for the specific language governing permissions and
    - * limitations under the License.
    - */
    -
    -package org.apache.hadoop.hive.ql.io.orc;
    -
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.hadoop.conf.Configuration;
    -import org.apache.hadoop.fs.FileSystem;
    -import org.apache.hadoop.fs.Path;
    -import org.apache.hadoop.hive.common.ValidTxnList;
    -import org.apache.hadoop.hive.common.ValidReadTxnList;
    -import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
    -import org.apache.hadoop.hive.ql.io.AcidUtils;
    -import org.apache.hadoop.hive.ql.io.RecordIdentifier;
    -import org.apache.hadoop.hive.ql.io.RecordUpdater;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair;
    -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    -import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    -import org.apache.hadoop.io.IntWritable;
    -import org.apache.hadoop.io.LongWritable;
    -import org.apache.hadoop.io.NullWritable;
    -import org.apache.hadoop.io.Text;
    -import org.apache.hadoop.mapred.InputFormat;
    -import org.apache.hadoop.mapred.InputSplit;
    -import org.apache.hadoop.mapred.JobConf;
    -import org.apache.hadoop.mapred.Reporter;
    -import org.junit.Test;
    -import org.mockito.MockSettings;
    -import org.mockito.Mockito;
    -
    -import java.io.File;
    -import java.io.IOException;
    -import java.nio.ByteBuffer;
    -import java.util.ArrayList;
    -import java.util.List;
    -
    -import static org.junit.Assert.assertEquals;
    -import static org.junit.Assert.assertTrue;
    -import static org.junit.Assert.assertFalse;
    -import static org.junit.Assert.assertNull;
    -
    -public class TestOrcRawRecordMerger {
    -
    - private static final Logger LOG = LoggerFactory.getLogger(TestOrcRawRecordMerger.class);
    -//todo: why is statementId -1?
    - @Test
    - public void testOrdering() throws Exception {
    - ReaderKey left = new ReaderKey(100, 200, 1200, 300);
    - ReaderKey right = new ReaderKey();
    - right.setValues(100, 200, 1000, 200,1);
    - assertTrue(right.compareTo(left) < 0);
    - assertTrue(left.compareTo(right) > 0);
    - assertEquals(false, left.equals(right));
    - left.set(right);
    - assertTrue(right.compareTo(left) == 0);
    - assertEquals(true, right.equals(left));
    - right.setRowId(2000);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 4,-1);
    - right.setValues(100, 2, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 4,-1);
    - right.setValues(1, 100, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 100,-1);
    - right.setValues(1, 2, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    -
    - // ensure that we are consistent when comparing to the base class
    - RecordIdentifier ri = new RecordIdentifier(1, 2, 3);
    - assertEquals(1, ri.compareTo(left));
    - assertEquals(-1, left.compareTo(ri));
    - assertEquals(false, ri.equals(left));
    - assertEquals(false, left.equals(ri));
    - }
    -
    - private static void setRow(OrcStruct event,
    - int operation,
    - long originalTransaction,
    - int bucket,
    - long rowId,
    - long currentTransaction,
    - String value) {
    - event.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation));
    - event.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
    - new LongWritable(originalTransaction));
    - event.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket));
    - event.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(rowId));
    - event.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
    - new LongWritable(currentTransaction));
    - OrcStruct row = new OrcStruct(1);
    - row.setFieldValue(0, new Text(value));
    - event.setFieldValue(OrcRecordUpdater.ROW, row);
    - }
    -
    - private static String value(OrcStruct event) {
    - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString();
    - }
    -
    - private List<StripeInformation> createStripes(long... rowCounts) {
    - long offset = 0;
    - List<StripeInformation> result =
    - new ArrayList<StripeInformation>(rowCounts.length);
    - for(long count: rowCounts) {
    - OrcProto.StripeInformation.Builder stripe =
    - OrcProto.StripeInformation.newBuilder();
    - stripe.setDataLength(800).setIndexLength(100).setFooterLength(100)
    - .setNumberOfRows(count).setOffset(offset);
    - offset += 1000;
    - result.add(new ReaderImpl.StripeInformationImpl(stripe.build()));
    - }
    - return result;
    - }
    -
    - // can add .verboseLogging() to cause Mockito to log invocations
    - private final MockSettings settings = Mockito.withSettings();
    - private final Path tmpDir = new Path(System.getProperty("test.tmp.dir",
    - "target" + File.separator + "test" + File.separator + "tmp"));
    -
    - private Reader createMockReader() throws IOException {
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
    - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
    - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
    - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
    - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    -
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    -
    - Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
    -
    - Mockito.when(recordReader.next(null)).thenReturn(row1);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row4);
    - Mockito.when(recordReader.next(row4)).thenReturn(row5);
    -
    - return reader;
    - }
    -
    - @Test
    - public void testReaderPair() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockReader();
    - RecordIdentifier minKey = new RecordIdentifier(10, 20, 30);
    - RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60);
    - ReaderPair pair = new ReaderPair(key, reader, 20, minKey, maxKey,
    - new Reader.Options(), 0);
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(40, key.getRowId());
    - assertEquals(120, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(60, key.getRowId());
    - assertEquals(130, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - @Test
    - public void testReaderPairNoMin() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockReader();
    -
    - ReaderPair pair = new ReaderPair(key, reader, 20, null, null,
    - new Reader.Options(), 0);
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(20, key.getRowId());
    - assertEquals(100, key.getCurrentTransactionId());
    - assertEquals("first", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(30, key.getRowId());
    - assertEquals(110, key.getCurrentTransactionId());
    - assertEquals("second", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(40, key.getRowId());
    - assertEquals(120, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(60, key.getRowId());
    - assertEquals(130, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(61, key.getRowId());
    - assertEquals(140, key.getCurrentTransactionId());
    - assertEquals("fifth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - private static OrcStruct createOriginalRow(String value) {
    - OrcStruct result = new OrcStruct(1);
    - result.setFieldValue(0, new Text(value));
    - return result;
    - }
    -
    - private Reader createMockOriginalReader() throws IOException {
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    - OrcStruct row1 = createOriginalRow("first");
    - OrcStruct row2 = createOriginalRow("second");
    - OrcStruct row3 = createOriginalRow("third");
    - OrcStruct row4 = createOriginalRow("fourth");
    - OrcStruct row5 = createOriginalRow("fifth");
    -
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    - Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L);
    - Mockito.when(recordReader.next(null)).thenReturn(row1);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row4);
    - Mockito.when(recordReader.next(row4)).thenReturn(row5);
    - return reader;
    - }
    -
    - @Test
    - public void testOriginalReaderPair() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockOriginalReader();
    - RecordIdentifier minKey = new RecordIdentifier(0, 10, 1);
    - RecordIdentifier maxKey = new RecordIdentifier(0, 10, 3);
    - boolean[] includes = new boolean[]{true, true};
    - ReaderPair pair = new OriginalReaderPair(key, reader, 10, minKey, maxKey,
    - new Reader.Options().include(includes));
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(2, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(3, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - private static ValidTxnList createMaximalTxnList() {
    - return new ValidReadTxnList(Long.MAX_VALUE + ":");
    - }
    -
    - @Test
    - public void testOriginalReaderPairNoMin() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockOriginalReader();
    - ReaderPair pair = new OriginalReaderPair(key, reader, 10, null, null,
    - new Reader.Options());
    - assertEquals("first", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(0, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("second", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(1, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("third", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(2, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("fourth", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(3, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("fifth", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(4, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(pair.recordReader).close();
    - }
    -
    - @Test
    - public void testNewBase() throws Exception {
    - Configuration conf = new Configuration();
    - conf.set("columns", "col1");
    - conf.set("columns.types", "string");
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    -
    - List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    - OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder();
    - typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1)
    - .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5)
    - .addSubtypes(6);
    - types.add(typeBuilder.build());
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - typeBuilder.clearSubtypes();
    - typeBuilder.addSubtypes(7);
    - types.add(typeBuilder.build());
    -
    - Mockito.when(reader.getTypes()).thenReturn(types);
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    -
    - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
    - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
    - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
    - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
    - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
    -
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    -
    - Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
    -
    - Mockito.when(recordReader.next(null)).thenReturn(row1, row4);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row5);
    -
    - Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
    - .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61"
    - .getBytes("UTF-8")));
    - Mockito.when(reader.getStripes())
    - .thenReturn(createStripes(2, 2, 1));
    -
    - OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader,
    - false, 10, createMaximalTxnList(),
    - new Reader.Options().range(1000, 1000), null);
    - RecordReader rr = merger.getCurrentReader().recordReader;
    - assertEquals(0, merger.getOtherReaders().size());
    -
    - assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey());
    - assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey());
    - RecordIdentifier id = merger.createKey();
    - OrcStruct event = merger.createValue();
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(10, id.getTransactionId());
    - assertEquals(20, id.getBucketId());
    - assertEquals(40, id.getRowId());
    - assertEquals("third", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(40, id.getTransactionId());
    - assertEquals(50, id.getBucketId());
    - assertEquals(60, id.getRowId());
    - assertEquals("fourth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - assertEquals(1.0, merger.getProgress(), 0.01);
    - merger.close();
    - Mockito.verify(rr).close();
    - Mockito.verify(rr).getProgress();
    -
    - StructObjectInspector eventObjectInspector =
    - (StructObjectInspector) merger.getObjectInspector();
    - List<? extends StructField> fields =
    - eventObjectInspector.getAllStructFieldRefs();
    - assertEquals(OrcRecordUpdater.FIELDS, fields.size());
    - assertEquals("operation",
    - fields.get(OrcRecordUpdater.OPERATION).getFieldName());
    - assertEquals("currentTransaction",
    - fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName());
    - assertEquals("originalTransaction",
    - fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName());
    - assertEquals("bucket",
    - fields.get(OrcRecordUpdater.BUCKET).getFieldName());
    - assertEquals("rowId",
    - fields.get(OrcRecordUpdater.ROW_ID).getFieldName());
    - StructObjectInspector rowObjectInspector =
    - (StructObjectInspector) fields.get(OrcRecordUpdater.ROW)
    - .getFieldObjectInspector();
    - assertEquals("col1",
    - rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName());
    - }
    -
    - static class MyRow {
    - Text col1;
    - RecordIdentifier ROW__ID;
    -
    - MyRow(String val) {
    - col1 = new Text(val);
    - }
    -
    - MyRow(String val, long rowId, long origTxn, int bucket) {
    - col1 = new Text(val);
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    - }
    -
    - static String getValue(OrcStruct event) {
    - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString();
    - }
    -
    - @Test
    - public void testEmpty() throws Exception {
    - final int BUCKET = 0;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testEmpty").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the empty base
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .inspector(inspector).bucket(BUCKET).writingBase(true)
    - .maximumTransactionId(100).finalDestination(root);
    - of.getRecordUpdater(root, options).close(false);
    -
    - ValidTxnList txnList = new ValidReadTxnList("200:");
    - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
    -
    - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(),
    - BUCKET);
    - Reader baseReader = OrcFile.createReader(basePath,
    - OrcFile.readerOptions(conf));
    - OrcRawRecordMerger merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - RecordIdentifier key = merger.createKey();
    - OrcStruct value = merger.createValue();
    - assertEquals(false, merger.next(key, value));
    - }
    -
    - /**
    - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is
    - * a base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testNewBaseAndDelta() throws Exception {
    - testNewBaseAndDelta(false);
    - testNewBaseAndDelta(true);
    - }
    - private void testNewBaseAndDelta(boolean use130Format) throws Exception {
    - final int BUCKET = 10;
    - String[] values = new String[]{"first", "second", "third", "fourth",
    - "fifth", "sixth", "seventh", "eighth",
    - "ninth", "tenth"};
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .inspector(inspector).bucket(BUCKET).finalDestination(root);
    - if(!use130Format) {
    - options.statementId(-1);
    - }
    - RecordUpdater ru = of.getRecordUpdater(root,
    - options.writingBase(true).maximumTransactionId(100));
    - for(String v: values) {
    - ru.insert(0, new MyRow(v));
    - }
    - ru.close(false);
    -
    - // write a delta
    - ru = of.getRecordUpdater(root, options.writingBase(false)
    - .minimumTransactionId(200).maximumTransactionId(200).recordIdColumn(1));
    - ru.update(200, new MyRow("update 1", 0, 0, BUCKET));
    - ru.update(200, new MyRow("update 2", 2, 0, BUCKET));
    - ru.update(200, new MyRow("update 3", 3, 0, BUCKET));
    - ru.delete(200, new MyRow("", 7, 0, BUCKET));
    - ru.delete(200, new MyRow("", 8, 0, BUCKET));
    - ru.close(false);
    -
    - ValidTxnList txnList = new ValidReadTxnList("200:");
    - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
    -
    - assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
    - assertEquals(new Path(root, use130Format ?
    - AcidUtils.deltaSubdir(200,200,0) : AcidUtils.deltaSubdir(200,200)),
    - directory.getCurrentDirectories().get(0).getPath());
    -
    - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(),
    - BUCKET);
    - Reader baseReader = OrcFile.createReader(basePath,
    - OrcFile.readerOptions(conf));
    - OrcRawRecordMerger merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - assertEquals(null, merger.getMinKey());
    - assertEquals(null, merger.getMaxKey());
    - RecordIdentifier id = merger.createKey();
    - OrcStruct event = merger.createValue();
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id);
    - assertEquals("update 1", getValue(event));
    - assertFalse(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id);
    - assertEquals("second", getValue(event));
    - assertFalse(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id);
    - assertEquals("update 2", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id);
    - assertEquals("update 3", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id);
    - assertEquals("fifth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id);
    - assertEquals("sixth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id);
    - assertEquals("seventh", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    - assertTrue(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id);
    - assertEquals("tenth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    -
    - // make a merger that doesn't collapse events
    - merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id);
    - assertEquals("update 1", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 0), id);
    - assertEquals("first", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id);
    - assertEquals("second", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id);
    - assertEquals("update 2", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 0), id);
    - assertEquals("third", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id);
    - assertEquals("update 3", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 0), id);
    - assertEquals("fourth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id);
    - assertEquals("fifth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id);
    - assertEquals("sixth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id);
    - assertEquals("seventh", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 0), id);
    - assertEquals("eighth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 0), id);
    - assertEquals("ninth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id);
    - assertEquals("tenth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    -
    - // try ignoring the 200 transaction and make sure it works still
    - ValidTxnList txns = new ValidReadTxnList("2000:200");
    - merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - txns, new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - for(int i=0; i < values.length; ++i) {
    - assertEquals(true, merger.next(id, event));
    - LOG.info("id = " + id + "event = " + event);
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, i, 0), id);
    - assertEquals(values[i], getValue(event));
    - }
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    - }
    -
    - static class BigRow {
    - int myint;
    - long mylong;
    - Text mytext;
    - float myfloat;
    - double mydouble;
    - RecordIdentifier ROW__ID;
    -
    - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble) {
    - this.myint = myint;
    - this.mylong = mylong;
    - this.mytext = new Text(mytext);
    - this.myfloat = myfloat;
    - this.mydouble = mydouble;
    - ROW__ID = null;
    - }
    -
    - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble,
    - long rowId, long origTxn, int bucket) {
    - this.myint = myint;
    - this.mylong = mylong;
    - this.mytext = new Text(mytext);
    - this.myfloat = myfloat;
    - this.mydouble = mydouble;
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    -
    - BigRow(long rowId, long origTxn, int bucket) {
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    - }
    -
    - /**
    - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is
    - * a base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderOldBaseAndDelta() throws Exception {
    - final int BUCKET = 10;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testOldBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - MemoryManager mgr = new MemoryManager(conf){
    - int rowsAddedSinceCheck = 0;
    -
    - @Override
    - synchronized void addedRow(int rows) throws IOException {
    - rowsAddedSinceCheck += rows;
    - if (rowsAddedSinceCheck >= 2) {
    - notifyWriters();
    - rowsAddedSinceCheck = 0;
    - }
    - }
    - };
    - // make 5 stripes with 2 rows each
    - Writer writer = OrcFile.createWriter(new Path(root, "0000010_0"),
    - OrcFile.writerOptions(conf).inspector(inspector).fileSystem(fs)
    - .blockPadding(false).bufferSize(10000).compress(CompressionKind.NONE)
    - .stripeSize(1).memory(mgr).version(OrcFile.Version.V_0_11));
    - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
    - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"};
    - for(int i=0; i < values.length; ++i) {
    - writer.addRow(new BigRow(i, i, values[i], i, i));
    - }
    - writer.close();
    -
    - // write a delta
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5).finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - values = new String[]{"0.0", null, null, "1.1", null, null, null,
    - "ignore.7"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(9, 0, BUCKET));
    - ru.close(false);
    -
    - // write a delta
    - options = options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(8, 0, BUCKET));
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(5, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    -
    - // loop through the 5 splits and read each
    - for(int i=0; i < 4; ++i) {
    - System.out.println("starting split " + i);
    - rr = inf.getRecordReader(splits[i], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    -
    - // there should be exactly two rows per a split
    - for(int j=0; j < 2; ++j) {
    - System.out.println("i = " + i + ", j = " + j);
    - assertEquals(true, rr.next(key, value));
    - System.out.println("record = " + value);
    - assertEquals(i + "." + j, value.getFieldValue(2).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    - rr = inf.getRecordReader(splits[4], job, Reporter.NULL);
    - assertEquals(false, rr.next(rr.createKey(), rr.createValue()));
    - }
    -
    - /**
    - * Test the RecordReader when there is a new base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderNewBaseAndDelta() throws Exception {
    - final int BUCKET = 11;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - MemoryManager mgr = new MemoryManager(conf){
    - int rowsAddedSinceCheck = 0;
    -
    - @Override
    - synchronized void addedRow(int rows) throws IOException {
    - rowsAddedSinceCheck += rows;
    - if (rowsAddedSinceCheck >= 2) {
    - notifyWriters();
    - rowsAddedSinceCheck = 0;
    - }
    - }
    - };
    -
    - // make 5 stripes with 2 rows each
    - OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions)
    - new OrcRecordUpdater.OrcOptions(conf)
    - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs);
    - options.orcOptions(OrcFile.writerOptions(conf)
    - .stripeSize(1).blockPadding(false).compress(CompressionKind.NONE)
    - .memory(mgr));
    - options.finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
    - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(0, new BigRow(i, i, values[i], i, i));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1).recordIdColumn(5);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"0.0", null, null, "1.1", null, null, null,
    - "ignore.7"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(9, 0, BUCKET));
    - ru.close(false);
    -
    - // write a delta
    - options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(8, 0, BUCKET));
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(5, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    -
    - // loop through the 5 splits and read each
    - for(int i=0; i < 4; ++i) {
    - System.out.println("starting split " + i);
    - rr = inf.getRecordReader(splits[i], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    -
    - // there should be exactly two rows per a split
    - for(int j=0; j < 2; ++j) {
    - System.out.println("i = " + i + ", j = " + j);
    - assertEquals(true, rr.next(key, value));
    - System.out.println("record = " + value);
    - assertEquals(i + "." + j, value.getFieldValue(2).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    - rr = inf.getRecordReader(splits[4], job, Reporter.NULL);
    - assertEquals(false, rr.next(rr.createKey(), rr.createValue()));
    - }
    -
    - /**
    - * Test the RecordReader when there is a new base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderDelta() throws Exception {
    - final int BUCKET = 0;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write a delta
    - AcidOutputFormat.Options options =
    - new AcidOutputFormat.Options(conf)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs)
    - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
    - .finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values = new String[]{"a", "b", "c", "d", "e"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(1, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"f", "g", "h", "i", "j"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(2, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - job.set("bucket_count", "1");
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(1, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    - rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    - values = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"};
    - OrcStruct row = rr.createValue();
    - for(int i = 0; i < values.length; ++i) {
    - System.out.println("Checking " + i);
    - assertEquals(true, rr.next(NullWritable.get(), row));
    - assertEquals(values[i], row.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(NullWritable.get(), row));
    - }
    -
    - /**
    - * Test the RecordReader when the delta has been flushed, but not closed.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderIncompleteDelta() throws Exception {
    - testRecordReaderIncompleteDelta(false);
    - testRecordReaderIncompleteDelta(true);
    - }
    - /**
    - *
    - * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001
    - */
    - private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception {
    - final int BUCKET = 1;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf).getRaw();
    - Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write a base
    - AcidOutputFormat.Options options =
    - new AcidOutputFormat.Options(conf)
    - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root);
    - if(!use130Format) {
    - options.statementId(-1);
    - }
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values= new String[]{"1", "2", "3", "4", "5"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(0, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.writingBase(false).minimumTransactionId(10)
    - .maximumTransactionId(19);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"6", "7", "8"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(1, new MyRow(values[i]));
    - }
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.input.dir", root.toString());
    - job.set("bucket_count", "2");
    -
    - // read the keys before the delta is flushed
    - InputSplit[] splits = inf.getSplits(job, 1);
    - assertEquals(2, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
    - inf.getRecordReader(splits[0], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    - System.out.println("Looking at split " + splits[0]);
    - for(int i=1; i < 6; ++i) {
    - System.out.println("Checking row " + i);
    - assertEquals(true, rr.next(key, value));
    - assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    -
    - ru.flush();
    - ru.flush();
    - values = new String[]{"9", "10"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(3, new MyRow(values[i]));
    - }
    - ru.flush();
    -
    - splits = inf.getSplits(job, 1);
    - assertEquals(2, splits.length);
    - rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    - Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10,19,0) :
    - AcidUtils.deltaSubdir(10,19)) + "/bucket_00001_flush_length");
    - assertEquals(true, fs.exists(sideFile));
    - assertEquals(24, fs.getFileStatus(sideFile).getLen());
    -
    - for(int i=1; i < 11; ++i) {
    - assertEquals(true, rr.next(key, value));
    - assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    -
    -}

    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    new file mode 100644
    index 0000000..1a3559e
    --- /dev/null
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    @@ -0,0 +1,297 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +import java.io.ByteArrayOutputStream;
    +import java.io.File;
    +import java.io.PrintStream;
    +import java.util.Random;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +public class TestRLEv2 {
    + Path workDir = new Path(System.getProperty("test.tmp.dir",
    + "target" + File.separator + "test" + File.separator + "tmp"));
    + Path testFilePath;
    + Configuration conf;
    + FileSystem fs;
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new Configuration();
    + fs = FileSystem.getLocal(conf);
    + testFilePath = new Path(workDir, "TestRLEv2." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + @Test
    + public void testFixedDeltaZero() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(123);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
    + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOne() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 1 byte delta (delta = 1). In total, 4 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOneDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(512 - (i % 512));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 1 byte delta (delta = 1). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLarge() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512 + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLargeDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow((512 - i % 512) + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testShortRepeat() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5; ++i) {
    + w.addRow(10);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 1 byte header + 1 byte value
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testDeltaUnknownSign() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + w.addRow(0);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(i);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
    + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
    + // each, 5120/8 = 640). Total bytes 642
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testPatchedBase() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + Random rand = new Random(123);
    + w.addRow(10000000);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(rand.nextInt(i+1));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // use PATCHED_BASE encoding
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
    + System.setOut(origOut);
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    index 134f78c..070e2ab 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    @@ -1984,9 +1984,9 @@ public class TestVectorOrcFile {
                stripe.getDataLength() < 5000);
          }
          // with HIVE-7832, the dictionaries will be disabled after writing the first
    - // stripe as there are too many distinct values. Hence only 4 stripes as
    + // stripe as there are too many distinct values. Hence only 3 stripes as
          // compared to 25 stripes in version 0.11 (above test case)
    - assertEquals(4, i);
    + assertEquals(3, i);
          assertEquals(2500, reader.getNumberOfRows());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/resources/orc-file-has-null.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
    index bef44a5..d0b25da 100644
    --- a/ql/src/test/resources/orc-file-has-null.out
    +++ b/ql/src/test/resources/orc-file-has-null.out
    @@ -29,35 +29,35 @@ File Statistics:
        Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000

      Stripes:
    - Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163
    + Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 64
    - Stream: column 2 section ROW_INDEX start: 84 length 82
    - Stream: column 1 section DATA start: 166 length 159
    - Stream: column 1 section LENGTH start: 325 length 32
    - Stream: column 2 section PRESENT start: 357 length 13
    - Stream: column 2 section DATA start: 370 length 22
    - Stream: column 2 section LENGTH start: 392 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 398 length 9
    + Stream: column 1 section ROW_INDEX start: 20 length 60
    + Stream: column 2 section ROW_INDEX start: 80 length 77
    + Stream: column 1 section DATA start: 157 length 159
    + Stream: column 1 section LENGTH start: 316 length 15
    + Stream: column 2 section PRESENT start: 331 length 13
    + Stream: column 2 section DATA start: 344 length 18
    + Stream: column 2 section LENGTH start: 362 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 368 length 9
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[2]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0
    - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488
    - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488
    - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488
    - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488
    - Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 474 length 17
    - Stream: column 1 section ROW_INDEX start: 491 length 64
    - Stream: column 2 section ROW_INDEX start: 555 length 39
    - Stream: column 1 section DATA start: 594 length 159
    - Stream: column 1 section LENGTH start: 753 length 32
    - Stream: column 2 section PRESENT start: 785 length 11
    - Stream: column 2 section DATA start: 796 length 0
    - Stream: column 2 section LENGTH start: 796 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 796 length 0
    + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488
    + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488
    + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488
    + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488
    + Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 442 length 17
    + Stream: column 1 section ROW_INDEX start: 459 length 60
    + Stream: column 2 section ROW_INDEX start: 519 length 39
    + Stream: column 1 section DATA start: 558 length 159
    + Stream: column 1 section LENGTH start: 717 length 15
    + Stream: column 2 section PRESENT start: 732 length 11
    + Stream: column 2 section DATA start: 743 length 0
    + Stream: column 2 section LENGTH start: 743 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 743 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -67,34 +67,34 @@ Stripes:
            Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
    - Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149
    - Stream: column 0 section ROW_INDEX start: 860 length 17
    - Stream: column 1 section ROW_INDEX start: 877 length 64
    - Stream: column 2 section ROW_INDEX start: 941 length 68
    - Stream: column 1 section DATA start: 1009 length 159
    - Stream: column 1 section LENGTH start: 1168 length 32
    - Stream: column 2 section DATA start: 1200 length 24
    - Stream: column 2 section LENGTH start: 1224 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 1230 length 11
    + Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137
    + Stream: column 0 section ROW_INDEX start: 807 length 17
    + Stream: column 1 section ROW_INDEX start: 824 length 60
    + Stream: column 2 section ROW_INDEX start: 884 length 60
    + Stream: column 1 section DATA start: 944 length 159
    + Stream: column 1 section LENGTH start: 1103 length 15
    + Stream: column 2 section DATA start: 1118 length 15
    + Stream: column 2 section LENGTH start: 1133 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 1139 length 11
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[1]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0
    - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488
    - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464
    - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440
    - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416
    - Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 1304 length 17
    - Stream: column 1 section ROW_INDEX start: 1321 length 64
    - Stream: column 2 section ROW_INDEX start: 1385 length 39
    - Stream: column 1 section DATA start: 1424 length 159
    - Stream: column 1 section LENGTH start: 1583 length 32
    - Stream: column 2 section PRESENT start: 1615 length 11
    - Stream: column 2 section DATA start: 1626 length 0
    - Stream: column 2 section LENGTH start: 1626 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 1626 length 0
    + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488
    + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464
    + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440
    + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416
    + Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 1210 length 17
    + Stream: column 1 section ROW_INDEX start: 1227 length 60
    + Stream: column 2 section ROW_INDEX start: 1287 length 39
    + Stream: column 1 section DATA start: 1326 length 159
    + Stream: column 1 section LENGTH start: 1485 length 15
    + Stream: column 2 section PRESENT start: 1500 length 11
    + Stream: column 2 section DATA start: 1511 length 0
    + Stream: column 2 section LENGTH start: 1511 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 1511 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -105,6 +105,6 @@ Stripes:
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0

    -File length: 1940 bytes
    +File length: 1823 bytes
      Padding length: 0 bytes
      Padding ratio: 0%

    http://git-wip-us.apache.org/repos/asf/hive/blob/4c8e47ee/ql/src/test/results/clientpositive/orc_file_dump.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    index 50d5701..c4a7d22 100644
    --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
    +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    @@ -129,7 +129,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -148,30 +148,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -192,7 +192,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33458 bytes
    +File length: 33416 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -247,7 +247,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 56
          Stream: column 1 section ROW_INDEX start: 79 length 58
    @@ -266,30 +266,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 10385 length 56
          Stream: column 8 section ROW_INDEX start: 10441 length 86
          Stream: column 8 section BLOOM_FILTER start: 10527 length 1829
    - Stream: column 9 section ROW_INDEX start: 12356 length 51
    - Stream: column 9 section BLOOM_FILTER start: 12407 length 95
    - Stream: column 10 section ROW_INDEX start: 12502 length 82
    - Stream: column 10 section BLOOM_FILTER start: 12584 length 1994
    - Stream: column 11 section ROW_INDEX start: 14578 length 47
    - Stream: column 11 section BLOOM_FILTER start: 14625 length 474
    - Stream: column 1 section PRESENT start: 15099 length 17
    - Stream: column 1 section DATA start: 15116 length 962
    - Stream: column 2 section PRESENT start: 16078 length 17
    - Stream: column 2 section DATA start: 16095 length 1441
    - Stream: column 3 section DATA start: 17536 length 1704
    - Stream: column 4 section DATA start: 19240 length 1998
    - Stream: column 5 section DATA start: 21238 length 2925
    - Stream: column 6 section DATA start: 24163 length 3323
    - Stream: column 7 section DATA start: 27486 length 137
    - Stream: column 8 section DATA start: 27623 length 1572
    - Stream: column 8 section LENGTH start: 29195 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548
    - Stream: column 9 section DATA start: 31053 length 62
    - Stream: column 9 section SECONDARY start: 31115 length 1783
    - Stream: column 10 section DATA start: 32898 length 2138
    - Stream: column 10 section SECONDARY start: 35036 length 231
    - Stream: column 11 section DATA start: 35267 length 1877
    - Stream: column 11 section LENGTH start: 37144 length 591
    + Stream: column 9 section ROW_INDEX start: 12356 length 50
    + Stream: column 9 section BLOOM_FILTER start: 12406 length 95
    + Stream: column 10 section ROW_INDEX start: 12501 length 82
    + Stream: column 10 section BLOOM_FILTER start: 12583 length 1994
    + Stream: column 11 section ROW_INDEX start: 14577 length 47
    + Stream: column 11 section BLOOM_FILTER start: 14624 length 474
    + Stream: column 1 section PRESENT start: 15098 length 17
    + Stream: column 1 section DATA start: 15115 length 962
    + Stream: column 2 section PRESENT start: 16077 length 17
    + Stream: column 2 section DATA start: 16094 length 1441
    + Stream: column 3 section DATA start: 17535 length 1704
    + Stream: column 4 section DATA start: 19239 length 1998
    + Stream: column 5 section DATA start: 21237 length 2925
    + Stream: column 6 section DATA start: 24162 length 3323
    + Stream: column 7 section DATA start: 27485 length 137
    + Stream: column 8 section DATA start: 27622 length 1572
    + Stream: column 8 section LENGTH start: 29194 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548
    + Stream: column 9 section DATA start: 31052 length 19
    + Stream: column 9 section SECONDARY start: 31071 length 1783
    + Stream: column 10 section DATA start: 32854 length 2138
    + Stream: column 10 section SECONDARY start: 34992 length 231
    + Stream: column 11 section DATA start: 35223 length 1877
    + Stream: column 11 section LENGTH start: 37100 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -310,7 +310,7 @@ Stripes:
            Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
            Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8

    -File length: 38613 bytes
    +File length: 38568 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -377,7 +377,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -396,30 +396,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -440,7 +440,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33458 bytes
    +File length: 33416 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
  • Prasanthj at Dec 3, 2015 at 6:21 am
    Repository: hive
    Updated Branches:
       refs/heads/master 8e2007e22 -> a603ed8d7


    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/results/clientpositive/orc_llap.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_llap.q.out b/ql/src/test/results/clientpositive/orc_llap.q.out
    index c9bb3c8..742c7d8 100644
    --- a/ql/src/test/results/clientpositive/orc_llap.q.out
    +++ b/ql/src/test/results/clientpositive/orc_llap.q.out
    @@ -698,17 +698,17 @@ STAGE PLANS:
                TableScan
                  alias: orc_llap
                  filterExpr: ((cint > 10) and cbigint is not null) (type: boolean)
    - Statistics: Num rows: 99583 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 98779 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
                    predicate: ((cint > 10) and cbigint is not null) (type: boolean)
    - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint)
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 16597 Data size: 265553 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 16463 Data size: 263408 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -768,17 +768,17 @@ STAGE PLANS:
                TableScan
                  alias: orc_llap
                  filterExpr: ((cint > 10) and cbigint is not null) (type: boolean)
    - Statistics: Num rows: 4979 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 4938 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
                    predicate: ((cint > 10) and cbigint is not null) (type: boolean)
    - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
    - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 830 Data size: 265609 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 823 Data size: 263411 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -838,17 +838,17 @@ STAGE PLANS:
                TableScan
                  alias: orc_llap
                  filterExpr: ((cint > 5) and (cint < 10)) (type: boolean)
    - Statistics: Num rows: 15320 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 15196 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
                    predicate: ((cint > 5) and (cint < 10)) (type: boolean)
    - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: cstring2 (type: string)
                      outputColumnNames: _col0
    - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                      File Output Operator
                        compressed: false
    - Statistics: Num rows: 1702 Data size: 177014 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 1688 Data size: 175561 Basic stats: COMPLETE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -907,22 +907,22 @@ STAGE PLANS:
            Map Operator Tree:
                TableScan
                  alias: orc_llap
    - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
                    expressions: cstring1 (type: string), cstring2 (type: string)
                    outputColumnNames: cstring1, cstring2
    - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                    Group By Operator
                      aggregations: count()
                      keys: cstring1 (type: string), cstring2 (type: string)
                      mode: hash
                      outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                      Reduce Output Operator
                        key expressions: _col0 (type: string), _col1 (type: string)
                        sort order: ++
                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
    - Statistics: Num rows: 7966 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 7902 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                        value expressions: _col2 (type: bigint)
            Execution mode: vectorized
            LLAP IO: all inputs
    @@ -932,10 +932,10 @@ STAGE PLANS:
                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                mode: mergepartial
                outputColumnNames: _col0, _col1, _col2
    - Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE
                File Output Operator
                  compressed: false
    - Statistics: Num rows: 3983 Data size: 796669 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3951 Data size: 790234 Basic stats: COMPLETE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    @@ -999,14 +999,14 @@ STAGE PLANS:
                TableScan
                  alias: o1
                  filterExpr: (cbigint is not null and csmallint is not null) (type: boolean)
    - Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
                    predicate: (cbigint is not null and csmallint is not null) (type: boolean)
    - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: csmallint (type: smallint), cstring1 (type: string)
                      outputColumnNames: _col0, _col2
    - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                      HashTable Sink Operator
                        keys:
                          0 _col0 (type: smallint)
    @@ -1018,14 +1018,14 @@ STAGE PLANS:
                TableScan
                  alias: o1
                  filterExpr: (cbigint is not null and csmallint is not null) (type: boolean)
    - Statistics: Num rows: 14226 Data size: 1593339 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 14111 Data size: 1580469 Basic stats: COMPLETE Column stats: NONE
                  Filter Operator
                    predicate: (cbigint is not null and csmallint is not null) (type: boolean)
    - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                    Select Operator
                      expressions: csmallint (type: smallint), cstring2 (type: string)
                      outputColumnNames: _col0, _col2
    - Statistics: Num rows: 3557 Data size: 398390 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3528 Data size: 395145 Basic stats: COMPLETE Column stats: NONE
                      Map Join Operator
                        condition map:
                             Inner Join 0 to 1
    @@ -1033,14 +1033,14 @@ STAGE PLANS:
                          0 _col0 (type: smallint)
                          1 _col0 (type: smallint)
                        outputColumnNames: _col2, _col5
    - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: _col2 (type: string), _col5 (type: string)
                          outputColumnNames: _col0, _col1
    - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                          File Output Operator
                            compressed: false
    - Statistics: Num rows: 3912 Data size: 438229 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 3880 Data size: 434659 Basic stats: COMPLETE Column stats: NONE
                            table:
                                input format: org.apache.hadoop.mapred.TextInputFormat
                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/results/clientpositive/orc_merge11.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out
    index da608db..f0769d4 100644
    --- a/ql/src/test/results/clientpositive/orc_merge11.q.out
    +++ b/ql/src/test/results/clientpositive/orc_merge11.q.out
    @@ -96,22 +96,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -120,12 +120,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6849 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -155,22 +155,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -179,12 +179,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6849 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -244,22 +244,22 @@ File Statistics:
        Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -268,26 +268,26 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
    - Stream: column 0 section ROW_INDEX start: 10733 length 17
    - Stream: column 1 section ROW_INDEX start: 10750 length 85
    - Stream: column 2 section ROW_INDEX start: 10835 length 87
    - Stream: column 3 section ROW_INDEX start: 10922 length 111
    - Stream: column 4 section ROW_INDEX start: 11033 length 108
    - Stream: column 5 section ROW_INDEX start: 11141 length 101
    - Stream: column 1 section DATA start: 11242 length 871
    - Stream: column 2 section DATA start: 12113 length 362
    - Stream: column 2 section LENGTH start: 12475 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
    - Stream: column 3 section DATA start: 12506 length 5167
    - Stream: column 4 section DATA start: 17673 length 524
    - Stream: column 4 section SECONDARY start: 18197 length 118
    - Stream: column 5 section DATA start: 18315 length 2913
    - Stream: column 5 section SECONDARY start: 21228 length 118
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
    + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
    + Stream: column 0 section ROW_INDEX start: 6511 length 17
    + Stream: column 1 section ROW_INDEX start: 6528 length 83
    + Stream: column 2 section ROW_INDEX start: 6611 length 81
    + Stream: column 3 section ROW_INDEX start: 6692 length 111
    + Stream: column 4 section ROW_INDEX start: 6803 length 110
    + Stream: column 5 section ROW_INDEX start: 6913 length 96
    + Stream: column 1 section DATA start: 7009 length 45
    + Stream: column 2 section DATA start: 7054 length 41
    + Stream: column 2 section LENGTH start: 7095 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
    + Stream: column 3 section DATA start: 7126 length 5167
    + Stream: column 4 section DATA start: 12293 length 524
    + Stream: column 4 section SECONDARY start: 12817 length 18
    + Stream: column 5 section DATA start: 12835 length 53
    + Stream: column 5 section SECONDARY start: 12888 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -296,12 +296,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 21814 bytes
    +File length: 13369 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
    index 2b13dc6..1402a3f 100644
    --- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
    +++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
    @@ -813,11 +813,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int)
                          outputColumnNames: _col0
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Map Join Operator
                            condition map:
                                 Left Outer Join0 to 1
    @@ -826,7 +826,7 @@ STAGE PLANS:
                              1 _col0 (type: int)
                            input vertices:
                              1 Map 3
    - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
                              aggregations: count()
                              mode: hash
    @@ -934,11 +934,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int), cmodint (type: int)
                          outputColumnNames: _col0, _col1
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Map Join Operator
                            condition map:
                                 Left Outer Join0 to 1
    @@ -950,7 +950,7 @@ STAGE PLANS:
                              1 _col0 (type: int)
                            input vertices:
                              1 Map 3
    - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
                              aggregations: count()
                              mode: hash
    @@ -1058,11 +1058,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int), cmodint (type: int)
                          outputColumnNames: _col0, _col1
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Map Join Operator
                            condition map:
                                 Left Outer Join0 to 1
    @@ -1074,7 +1074,7 @@ STAGE PLANS:
                              1 _col0 (type: int)
                            input vertices:
                              1 Map 3
    - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
                              aggregations: count()
                              mode: hash
    @@ -1182,11 +1182,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int)
                          outputColumnNames: _col0
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Map Join Operator
                            condition map:
                                 Left Outer Join0 to 1
    @@ -1198,7 +1198,7 @@ STAGE PLANS:
                              1 _col0 (type: int)
                            input vertices:
                              1 Map 3
    - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                            Group By Operator
                              aggregations: count()
                              mode: hash
    @@ -1300,11 +1300,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int)
                          outputColumnNames: _col0
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Spark HashTable Sink Operator
                            keys:
                              0 _col0 (type: int)
    @@ -1323,11 +1323,11 @@ STAGE PLANS:
                  Map Operator Tree:
                      TableScan
                        alias: s
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                        Select Operator
                          expressions: cmodtinyint (type: int), cmodint (type: int)
                          outputColumnNames: _col0, _col1
    - Statistics: Num rows: 6058 Data size: 2793 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6058 Data size: 2757 Basic stats: COMPLETE Column stats: NONE
                          Map Join Operator
                            condition map:
                                 Left Outer Join0 to 1
    @@ -1337,7 +1337,7 @@ STAGE PLANS:
                            outputColumnNames: _col0
                            input vertices:
                              1 Map 3
    - Statistics: Num rows: 6663 Data size: 3072 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 6663 Data size: 3032 Basic stats: COMPLETE Column stats: NONE
                            Map Join Operator
                              condition map:
                                   Left Outer Join0 to 1
    @@ -1346,7 +1346,7 @@ STAGE PLANS:
                                1 _col0 (type: int)
                              input vertices:
                                1 Map 4
    - Statistics: Num rows: 7329 Data size: 3379 Basic stats: COMPLETE Column stats: NONE
    + Statistics: Num rows: 7329 Data size: 3335 Basic stats: COMPLETE Column stats: NONE
                              Group By Operator
                                aggregations: count()
                                mode: hash

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    index da608db..f0769d4 100644
    --- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    +++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    @@ -96,22 +96,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -120,12 +120,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6849 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -155,22 +155,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -179,12 +179,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6849 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -244,22 +244,22 @@ File Statistics:
        Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -268,26 +268,26 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
    - Stream: column 0 section ROW_INDEX start: 10733 length 17
    - Stream: column 1 section ROW_INDEX start: 10750 length 85
    - Stream: column 2 section ROW_INDEX start: 10835 length 87
    - Stream: column 3 section ROW_INDEX start: 10922 length 111
    - Stream: column 4 section ROW_INDEX start: 11033 length 108
    - Stream: column 5 section ROW_INDEX start: 11141 length 101
    - Stream: column 1 section DATA start: 11242 length 871
    - Stream: column 2 section DATA start: 12113 length 362
    - Stream: column 2 section LENGTH start: 12475 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
    - Stream: column 3 section DATA start: 12506 length 5167
    - Stream: column 4 section DATA start: 17673 length 524
    - Stream: column 4 section SECONDARY start: 18197 length 118
    - Stream: column 5 section DATA start: 18315 length 2913
    - Stream: column 5 section SECONDARY start: 21228 length 118
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
    + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
    + Stream: column 0 section ROW_INDEX start: 6511 length 17
    + Stream: column 1 section ROW_INDEX start: 6528 length 83
    + Stream: column 2 section ROW_INDEX start: 6611 length 81
    + Stream: column 3 section ROW_INDEX start: 6692 length 111
    + Stream: column 4 section ROW_INDEX start: 6803 length 110
    + Stream: column 5 section ROW_INDEX start: 6913 length 96
    + Stream: column 1 section DATA start: 7009 length 45
    + Stream: column 2 section DATA start: 7054 length 41
    + Stream: column 2 section LENGTH start: 7095 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
    + Stream: column 3 section DATA start: 7126 length 5167
    + Stream: column 4 section DATA start: 12293 length 524
    + Stream: column 4 section SECONDARY start: 12817 length 18
    + Stream: column 5 section DATA start: 12835 length 53
    + Stream: column 5 section SECONDARY start: 12888 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -296,12 +296,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 21814 bytes
    +File length: 13369 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
  • Prasanthj at Dec 3, 2015 at 6:21 am
    HIVE-12537: RLEv2 doesn't seem to work (Prasanth Jayachandran reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a603ed8d
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a603ed8d
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a603ed8d

    Branch: refs/heads/master
    Commit: a603ed8d73e7f2bf4588a67f1b40709cc54fcbfe
    Parents: 8e2007e
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 3 00:20:13 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 3 00:20:13 2015 -0600

    ----------------------------------------------------------------------
      .../ql/io/orc/RunLengthIntegerWriterV2.java | 43 +-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 4 +-
      .../ql/io/orc/TestOrcRawRecordMerger.java.orig | 1150 ------------------
      .../apache/hadoop/hive/ql/io/orc/TestRLEv2.java | 297 +++++
      .../hive/ql/io/orc/TestVectorOrcFile.java | 4 +-
      ql/src/test/resources/orc-file-has-null.out | 94 +-
      .../results/clientpositive/orc_file_dump.q.out | 156 +--
      .../test/results/clientpositive/orc_llap.q.out | 54 +-
      .../results/clientpositive/orc_merge11.q.out | 160 +--
      .../spark/vector_outer_join5.q.out | 36 +-
      .../clientpositive/tez/orc_merge11.q.out | 160 +--
      11 files changed, 652 insertions(+), 1506 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    index 6344a66..95f8cc8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    @@ -118,8 +118,8 @@ import java.io.IOException;
       * <li>8 bits for lower run length bits</li>
       * </ul>
       * </li>
    - * <li>Base value - encoded as varint</li>
    - * <li>Delta base - encoded as varint</li>
    + * <li>Base value - zigzag encoded value written as varint</li>
    + * <li>Delta base - zigzag encoded value written as varint</li>
       * <li>Delta blob - only positive values. monotonicity and orderness are decided
       * based on the sign of the base value and delta base</li>
       * </ul>
    @@ -472,29 +472,28 @@ class RunLengthIntegerWriterV2 implements IntegerWriter {
          // invariant - subtracting any number from any other in the literals after
          // this point won't overflow

    + // if min is equal to max then the delta is 0, this condition happens for
    + // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    + if (min == max) {
    + assert isFixedDelta : min + "==" + max +
    + ", isFixedDelta cannot be false";
    + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    + fixedDelta = 0;
    + encoding = EncodingType.DELTA;
    + return;
    + }
    +
    + if (isFixedDelta) {
    + assert currDelta == initialDelta
    + : "currDelta should be equal to initialDelta for fixed delta encoding";
    + encoding = EncodingType.DELTA;
    + fixedDelta = currDelta;
    + return;
    + }
    +
          // if initialDelta is 0 then we cannot delta encode as we cannot identify
          // the sign of deltas (increasing or decreasing)
          if (initialDelta != 0) {
    -
    - // if min is equal to max then the delta is 0, this condition happens for
    - // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    - if (min == max) {
    - assert isFixedDelta : min + "==" + max +
    - ", isFixedDelta cannot be false";
    - assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    - fixedDelta = 0;
    - encoding = EncodingType.DELTA;
    - return;
    - }
    -
    - if (isFixedDelta) {
    - assert currDelta == initialDelta
    - : "currDelta should be equal to initialDelta for fixed delta encoding";
    - encoding = EncodingType.DELTA;
    - fixedDelta = currDelta;
    - return;
    - }
    -
            // stores the number of bits required for packing delta blob in
            // delta encoding
            bitsDeltaMax = utils.findClosestNumBits(deltaMax);

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    index 146f5b1..2992f3c 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    @@ -1895,9 +1895,9 @@ public class TestOrcFile {
                stripe.getDataLength() < 5000);
          }
          // with HIVE-7832, the dictionaries will be disabled after writing the first
    - // stripe as there are too many distinct values. Hence only 4 stripes as
    + // stripe as there are too many distinct values. Hence only 3 stripes as
          // compared to 25 stripes in version 0.11 (above test case)
    - assertEquals(4, i);
    + assertEquals(3, i);
          assertEquals(2500, reader.getNumberOfRows());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    deleted file mode 100644
    index 15ee24c..0000000
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig
    +++ /dev/null
    @@ -1,1150 +0,0 @@
    -/**
    - * Licensed to the Apache Software Foundation (ASF) under one
    - * or more contributor license agreements. See the NOTICE file
    - * distributed with this work for additional information
    - * regarding copyright ownership. The ASF licenses this file
    - * to you under the Apache License, Version 2.0 (the
    - * "License"); you may not use this file except in compliance
    - * with the License. You may obtain a copy of the License at
    - *
    - * http://www.apache.org/licenses/LICENSE-2.0
    - *
    - * Unless required by applicable law or agreed to in writing, software
    - * distributed under the License is distributed on an "AS IS" BASIS,
    - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    - * See the License for the specific language governing permissions and
    - * limitations under the License.
    - */
    -
    -package org.apache.hadoop.hive.ql.io.orc;
    -
    -import org.slf4j.Logger;
    -import org.slf4j.LoggerFactory;
    -import org.apache.hadoop.conf.Configuration;
    -import org.apache.hadoop.fs.FileSystem;
    -import org.apache.hadoop.fs.Path;
    -import org.apache.hadoop.hive.common.ValidTxnList;
    -import org.apache.hadoop.hive.common.ValidReadTxnList;
    -import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
    -import org.apache.hadoop.hive.ql.io.AcidUtils;
    -import org.apache.hadoop.hive.ql.io.RecordIdentifier;
    -import org.apache.hadoop.hive.ql.io.RecordUpdater;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey;
    -import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair;
    -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    -import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    -import org.apache.hadoop.io.IntWritable;
    -import org.apache.hadoop.io.LongWritable;
    -import org.apache.hadoop.io.NullWritable;
    -import org.apache.hadoop.io.Text;
    -import org.apache.hadoop.mapred.InputFormat;
    -import org.apache.hadoop.mapred.InputSplit;
    -import org.apache.hadoop.mapred.JobConf;
    -import org.apache.hadoop.mapred.Reporter;
    -import org.junit.Test;
    -import org.mockito.MockSettings;
    -import org.mockito.Mockito;
    -
    -import java.io.File;
    -import java.io.IOException;
    -import java.nio.ByteBuffer;
    -import java.util.ArrayList;
    -import java.util.List;
    -
    -import static org.junit.Assert.assertEquals;
    -import static org.junit.Assert.assertTrue;
    -import static org.junit.Assert.assertFalse;
    -import static org.junit.Assert.assertNull;
    -
    -public class TestOrcRawRecordMerger {
    -
    - private static final Logger LOG = LoggerFactory.getLogger(TestOrcRawRecordMerger.class);
    -//todo: why is statementId -1?
    - @Test
    - public void testOrdering() throws Exception {
    - ReaderKey left = new ReaderKey(100, 200, 1200, 300);
    - ReaderKey right = new ReaderKey();
    - right.setValues(100, 200, 1000, 200,1);
    - assertTrue(right.compareTo(left) < 0);
    - assertTrue(left.compareTo(right) > 0);
    - assertEquals(false, left.equals(right));
    - left.set(right);
    - assertTrue(right.compareTo(left) == 0);
    - assertEquals(true, right.equals(left));
    - right.setRowId(2000);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 4,-1);
    - right.setValues(100, 2, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 4,-1);
    - right.setValues(1, 100, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    - left.setValues(1, 2, 3, 100,-1);
    - right.setValues(1, 2, 3, 4,-1);
    - assertTrue(left.compareTo(right) < 0);
    - assertTrue(right.compareTo(left) > 0);
    -
    - // ensure that we are consistent when comparing to the base class
    - RecordIdentifier ri = new RecordIdentifier(1, 2, 3);
    - assertEquals(1, ri.compareTo(left));
    - assertEquals(-1, left.compareTo(ri));
    - assertEquals(false, ri.equals(left));
    - assertEquals(false, left.equals(ri));
    - }
    -
    - private static void setRow(OrcStruct event,
    - int operation,
    - long originalTransaction,
    - int bucket,
    - long rowId,
    - long currentTransaction,
    - String value) {
    - event.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation));
    - event.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION,
    - new LongWritable(originalTransaction));
    - event.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket));
    - event.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(rowId));
    - event.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION,
    - new LongWritable(currentTransaction));
    - OrcStruct row = new OrcStruct(1);
    - row.setFieldValue(0, new Text(value));
    - event.setFieldValue(OrcRecordUpdater.ROW, row);
    - }
    -
    - private static String value(OrcStruct event) {
    - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString();
    - }
    -
    - private List<StripeInformation> createStripes(long... rowCounts) {
    - long offset = 0;
    - List<StripeInformation> result =
    - new ArrayList<StripeInformation>(rowCounts.length);
    - for(long count: rowCounts) {
    - OrcProto.StripeInformation.Builder stripe =
    - OrcProto.StripeInformation.newBuilder();
    - stripe.setDataLength(800).setIndexLength(100).setFooterLength(100)
    - .setNumberOfRows(count).setOffset(offset);
    - offset += 1000;
    - result.add(new ReaderImpl.StripeInformationImpl(stripe.build()));
    - }
    - return result;
    - }
    -
    - // can add .verboseLogging() to cause Mockito to log invocations
    - private final MockSettings settings = Mockito.withSettings();
    - private final Path tmpDir = new Path(System.getProperty("test.tmp.dir",
    - "target" + File.separator + "test" + File.separator + "tmp"));
    -
    - private Reader createMockReader() throws IOException {
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
    - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
    - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
    - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
    - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    -
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    -
    - Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
    -
    - Mockito.when(recordReader.next(null)).thenReturn(row1);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row4);
    - Mockito.when(recordReader.next(row4)).thenReturn(row5);
    -
    - return reader;
    - }
    -
    - @Test
    - public void testReaderPair() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockReader();
    - RecordIdentifier minKey = new RecordIdentifier(10, 20, 30);
    - RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60);
    - ReaderPair pair = new ReaderPair(key, reader, 20, minKey, maxKey,
    - new Reader.Options(), 0);
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(40, key.getRowId());
    - assertEquals(120, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(60, key.getRowId());
    - assertEquals(130, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - @Test
    - public void testReaderPairNoMin() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockReader();
    -
    - ReaderPair pair = new ReaderPair(key, reader, 20, null, null,
    - new Reader.Options(), 0);
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(20, key.getRowId());
    - assertEquals(100, key.getCurrentTransactionId());
    - assertEquals("first", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(30, key.getRowId());
    - assertEquals(110, key.getCurrentTransactionId());
    - assertEquals("second", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(10, key.getTransactionId());
    - assertEquals(20, key.getBucketId());
    - assertEquals(40, key.getRowId());
    - assertEquals(120, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(60, key.getRowId());
    - assertEquals(130, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(40, key.getTransactionId());
    - assertEquals(50, key.getBucketId());
    - assertEquals(61, key.getRowId());
    - assertEquals(140, key.getCurrentTransactionId());
    - assertEquals("fifth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - private static OrcStruct createOriginalRow(String value) {
    - OrcStruct result = new OrcStruct(1);
    - result.setFieldValue(0, new Text(value));
    - return result;
    - }
    -
    - private Reader createMockOriginalReader() throws IOException {
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    - OrcStruct row1 = createOriginalRow("first");
    - OrcStruct row2 = createOriginalRow("second");
    - OrcStruct row3 = createOriginalRow("third");
    - OrcStruct row4 = createOriginalRow("fourth");
    - OrcStruct row5 = createOriginalRow("fifth");
    -
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    - Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L);
    - Mockito.when(recordReader.next(null)).thenReturn(row1);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row4);
    - Mockito.when(recordReader.next(row4)).thenReturn(row5);
    - return reader;
    - }
    -
    - @Test
    - public void testOriginalReaderPair() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockOriginalReader();
    - RecordIdentifier minKey = new RecordIdentifier(0, 10, 1);
    - RecordIdentifier maxKey = new RecordIdentifier(0, 10, 3);
    - boolean[] includes = new boolean[]{true, true};
    - ReaderPair pair = new OriginalReaderPair(key, reader, 10, minKey, maxKey,
    - new Reader.Options().include(includes));
    - RecordReader recordReader = pair.recordReader;
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(2, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    - assertEquals("third", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(3, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    - assertEquals("fourth", value(pair.nextRecord));
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(recordReader).close();
    - }
    -
    - private static ValidTxnList createMaximalTxnList() {
    - return new ValidReadTxnList(Long.MAX_VALUE + ":");
    - }
    -
    - @Test
    - public void testOriginalReaderPairNoMin() throws Exception {
    - ReaderKey key = new ReaderKey();
    - Reader reader = createMockOriginalReader();
    - ReaderPair pair = new OriginalReaderPair(key, reader, 10, null, null,
    - new Reader.Options());
    - assertEquals("first", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(0, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("second", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(1, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("third", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(2, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("fourth", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(3, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals("fifth", value(pair.nextRecord));
    - assertEquals(0, key.getTransactionId());
    - assertEquals(10, key.getBucketId());
    - assertEquals(4, key.getRowId());
    - assertEquals(0, key.getCurrentTransactionId());
    -
    - pair.next(pair.nextRecord);
    - assertEquals(null, pair.nextRecord);
    - Mockito.verify(pair.recordReader).close();
    - }
    -
    - @Test
    - public void testNewBase() throws Exception {
    - Configuration conf = new Configuration();
    - conf.set("columns", "col1");
    - conf.set("columns.types", "string");
    - Reader reader = Mockito.mock(Reader.class, settings);
    - RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
    -
    - List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    - OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder();
    - typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1)
    - .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5)
    - .addSubtypes(6);
    - types.add(typeBuilder.build());
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - types.add(null);
    - typeBuilder.clearSubtypes();
    - typeBuilder.addSubtypes(7);
    - types.add(typeBuilder.build());
    -
    - Mockito.when(reader.getTypes()).thenReturn(types);
    - Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class)))
    - .thenReturn(recordReader);
    -
    - OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
    - OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
    - OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
    - OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
    - OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
    - setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
    -
    - Mockito.when(recordReader.hasNext()).
    - thenReturn(true, true, true, true, true, false);
    -
    - Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
    -
    - Mockito.when(recordReader.next(null)).thenReturn(row1, row4);
    - Mockito.when(recordReader.next(row1)).thenReturn(row2);
    - Mockito.when(recordReader.next(row2)).thenReturn(row3);
    - Mockito.when(recordReader.next(row3)).thenReturn(row5);
    -
    - Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME))
    - .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61"
    - .getBytes("UTF-8")));
    - Mockito.when(reader.getStripes())
    - .thenReturn(createStripes(2, 2, 1));
    -
    - OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader,
    - false, 10, createMaximalTxnList(),
    - new Reader.Options().range(1000, 1000), null);
    - RecordReader rr = merger.getCurrentReader().recordReader;
    - assertEquals(0, merger.getOtherReaders().size());
    -
    - assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey());
    - assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey());
    - RecordIdentifier id = merger.createKey();
    - OrcStruct event = merger.createValue();
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(10, id.getTransactionId());
    - assertEquals(20, id.getBucketId());
    - assertEquals(40, id.getRowId());
    - assertEquals("third", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(40, id.getTransactionId());
    - assertEquals(50, id.getBucketId());
    - assertEquals(60, id.getRowId());
    - assertEquals("fourth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - assertEquals(1.0, merger.getProgress(), 0.01);
    - merger.close();
    - Mockito.verify(rr).close();
    - Mockito.verify(rr).getProgress();
    -
    - StructObjectInspector eventObjectInspector =
    - (StructObjectInspector) merger.getObjectInspector();
    - List<? extends StructField> fields =
    - eventObjectInspector.getAllStructFieldRefs();
    - assertEquals(OrcRecordUpdater.FIELDS, fields.size());
    - assertEquals("operation",
    - fields.get(OrcRecordUpdater.OPERATION).getFieldName());
    - assertEquals("currentTransaction",
    - fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName());
    - assertEquals("originalTransaction",
    - fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName());
    - assertEquals("bucket",
    - fields.get(OrcRecordUpdater.BUCKET).getFieldName());
    - assertEquals("rowId",
    - fields.get(OrcRecordUpdater.ROW_ID).getFieldName());
    - StructObjectInspector rowObjectInspector =
    - (StructObjectInspector) fields.get(OrcRecordUpdater.ROW)
    - .getFieldObjectInspector();
    - assertEquals("col1",
    - rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName());
    - }
    -
    - static class MyRow {
    - Text col1;
    - RecordIdentifier ROW__ID;
    -
    - MyRow(String val) {
    - col1 = new Text(val);
    - }
    -
    - MyRow(String val, long rowId, long origTxn, int bucket) {
    - col1 = new Text(val);
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    - }
    -
    - static String getValue(OrcStruct event) {
    - return OrcRecordUpdater.getRow(event).getFieldValue(0).toString();
    - }
    -
    - @Test
    - public void testEmpty() throws Exception {
    - final int BUCKET = 0;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testEmpty").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the empty base
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .inspector(inspector).bucket(BUCKET).writingBase(true)
    - .maximumTransactionId(100).finalDestination(root);
    - of.getRecordUpdater(root, options).close(false);
    -
    - ValidTxnList txnList = new ValidReadTxnList("200:");
    - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
    -
    - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(),
    - BUCKET);
    - Reader baseReader = OrcFile.createReader(basePath,
    - OrcFile.readerOptions(conf));
    - OrcRawRecordMerger merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - RecordIdentifier key = merger.createKey();
    - OrcStruct value = merger.createValue();
    - assertEquals(false, merger.next(key, value));
    - }
    -
    - /**
    - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is
    - * a base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testNewBaseAndDelta() throws Exception {
    - testNewBaseAndDelta(false);
    - testNewBaseAndDelta(true);
    - }
    - private void testNewBaseAndDelta(boolean use130Format) throws Exception {
    - final int BUCKET = 10;
    - String[] values = new String[]{"first", "second", "third", "fourth",
    - "fifth", "sixth", "seventh", "eighth",
    - "ninth", "tenth"};
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .inspector(inspector).bucket(BUCKET).finalDestination(root);
    - if(!use130Format) {
    - options.statementId(-1);
    - }
    - RecordUpdater ru = of.getRecordUpdater(root,
    - options.writingBase(true).maximumTransactionId(100));
    - for(String v: values) {
    - ru.insert(0, new MyRow(v));
    - }
    - ru.close(false);
    -
    - // write a delta
    - ru = of.getRecordUpdater(root, options.writingBase(false)
    - .minimumTransactionId(200).maximumTransactionId(200).recordIdColumn(1));
    - ru.update(200, new MyRow("update 1", 0, 0, BUCKET));
    - ru.update(200, new MyRow("update 2", 2, 0, BUCKET));
    - ru.update(200, new MyRow("update 3", 3, 0, BUCKET));
    - ru.delete(200, new MyRow("", 7, 0, BUCKET));
    - ru.delete(200, new MyRow("", 8, 0, BUCKET));
    - ru.close(false);
    -
    - ValidTxnList txnList = new ValidReadTxnList("200:");
    - AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
    -
    - assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
    - assertEquals(new Path(root, use130Format ?
    - AcidUtils.deltaSubdir(200,200,0) : AcidUtils.deltaSubdir(200,200)),
    - directory.getCurrentDirectories().get(0).getPath());
    -
    - Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(),
    - BUCKET);
    - Reader baseReader = OrcFile.createReader(basePath,
    - OrcFile.readerOptions(conf));
    - OrcRawRecordMerger merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - assertEquals(null, merger.getMinKey());
    - assertEquals(null, merger.getMaxKey());
    - RecordIdentifier id = merger.createKey();
    - OrcStruct event = merger.createValue();
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id);
    - assertEquals("update 1", getValue(event));
    - assertFalse(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id);
    - assertEquals("second", getValue(event));
    - assertFalse(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id);
    - assertEquals("update 2", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id);
    - assertEquals("update 3", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id);
    - assertEquals("fifth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id);
    - assertEquals("sixth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id);
    - assertEquals("seventh", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    - assertTrue(merger.isDelete(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id);
    - assertEquals("tenth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    -
    - // make a merger that doesn't collapse events
    - merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET,
    - createMaximalTxnList(), new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 200), id);
    - assertEquals("update 1", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 0, 0), id);
    - assertEquals("first", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 1, 0), id);
    - assertEquals("second", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 200), id);
    - assertEquals("update 2", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 2, 0), id);
    - assertEquals("third", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.UPDATE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 200), id);
    - assertEquals("update 3", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 3, 0), id);
    - assertEquals("fourth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 4, 0), id);
    - assertEquals("fifth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 5, 0), id);
    - assertEquals("sixth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 6, 0), id);
    - assertEquals("seventh", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 7, 0), id);
    - assertEquals("eighth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.DELETE_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 200), id);
    - assertNull(OrcRecordUpdater.getRow(event));
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 8, 0), id);
    - assertEquals("ninth", getValue(event));
    -
    - assertEquals(true, merger.next(id, event));
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, 9, 0), id);
    - assertEquals("tenth", getValue(event));
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    -
    - // try ignoring the 200 transaction and make sure it works still
    - ValidTxnList txns = new ValidReadTxnList("2000:200");
    - merger =
    - new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET,
    - txns, new Reader.Options(),
    - AcidUtils.getPaths(directory.getCurrentDirectories()));
    - for(int i=0; i < values.length; ++i) {
    - assertEquals(true, merger.next(id, event));
    - LOG.info("id = " + id + "event = " + event);
    - assertEquals(OrcRecordUpdater.INSERT_OPERATION,
    - OrcRecordUpdater.getOperation(event));
    - assertEquals(new ReaderKey(0, BUCKET, i, 0), id);
    - assertEquals(values[i], getValue(event));
    - }
    -
    - assertEquals(false, merger.next(id, event));
    - merger.close();
    - }
    -
    - static class BigRow {
    - int myint;
    - long mylong;
    - Text mytext;
    - float myfloat;
    - double mydouble;
    - RecordIdentifier ROW__ID;
    -
    - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble) {
    - this.myint = myint;
    - this.mylong = mylong;
    - this.mytext = new Text(mytext);
    - this.myfloat = myfloat;
    - this.mydouble = mydouble;
    - ROW__ID = null;
    - }
    -
    - BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble,
    - long rowId, long origTxn, int bucket) {
    - this.myint = myint;
    - this.mylong = mylong;
    - this.mytext = new Text(mytext);
    - this.myfloat = myfloat;
    - this.mydouble = mydouble;
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    -
    - BigRow(long rowId, long origTxn, int bucket) {
    - ROW__ID = new RecordIdentifier(origTxn, bucket, rowId);
    - }
    - }
    -
    - /**
    - * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is
    - * a base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderOldBaseAndDelta() throws Exception {
    - final int BUCKET = 10;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testOldBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - MemoryManager mgr = new MemoryManager(conf){
    - int rowsAddedSinceCheck = 0;
    -
    - @Override
    - synchronized void addedRow(int rows) throws IOException {
    - rowsAddedSinceCheck += rows;
    - if (rowsAddedSinceCheck >= 2) {
    - notifyWriters();
    - rowsAddedSinceCheck = 0;
    - }
    - }
    - };
    - // make 5 stripes with 2 rows each
    - Writer writer = OrcFile.createWriter(new Path(root, "0000010_0"),
    - OrcFile.writerOptions(conf).inspector(inspector).fileSystem(fs)
    - .blockPadding(false).bufferSize(10000).compress(CompressionKind.NONE)
    - .stripeSize(1).memory(mgr).version(OrcFile.Version.V_0_11));
    - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
    - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"};
    - for(int i=0; i < values.length; ++i) {
    - writer.addRow(new BigRow(i, i, values[i], i, i));
    - }
    - writer.close();
    -
    - // write a delta
    - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf)
    - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5).finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - values = new String[]{"0.0", null, null, "1.1", null, null, null,
    - "ignore.7"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(9, 0, BUCKET));
    - ru.close(false);
    -
    - // write a delta
    - options = options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(8, 0, BUCKET));
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(5, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    -
    - // loop through the 5 splits and read each
    - for(int i=0; i < 4; ++i) {
    - System.out.println("starting split " + i);
    - rr = inf.getRecordReader(splits[i], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    -
    - // there should be exactly two rows per a split
    - for(int j=0; j < 2; ++j) {
    - System.out.println("i = " + i + ", j = " + j);
    - assertEquals(true, rr.next(key, value));
    - System.out.println("record = " + value);
    - assertEquals(i + "." + j, value.getFieldValue(2).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    - rr = inf.getRecordReader(splits[4], job, Reporter.NULL);
    - assertEquals(false, rr.next(rr.createKey(), rr.createValue()));
    - }
    -
    - /**
    - * Test the RecordReader when there is a new base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderNewBaseAndDelta() throws Exception {
    - final int BUCKET = 11;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write the base
    - MemoryManager mgr = new MemoryManager(conf){
    - int rowsAddedSinceCheck = 0;
    -
    - @Override
    - synchronized void addedRow(int rows) throws IOException {
    - rowsAddedSinceCheck += rows;
    - if (rowsAddedSinceCheck >= 2) {
    - notifyWriters();
    - rowsAddedSinceCheck = 0;
    - }
    - }
    - };
    -
    - // make 5 stripes with 2 rows each
    - OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions)
    - new OrcRecordUpdater.OrcOptions(conf)
    - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs);
    - options.orcOptions(OrcFile.writerOptions(conf)
    - .stripeSize(1).blockPadding(false).compress(CompressionKind.NONE)
    - .memory(mgr));
    - options.finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3",
    - "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(0, new BigRow(i, i, values[i], i, i));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1).recordIdColumn(5);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"0.0", null, null, "1.1", null, null, null,
    - "ignore.7"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(9, 0, BUCKET));
    - ru.close(false);
    -
    - // write a delta
    - options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{null, null, "1.0", null, null, null, null, "3.1"};
    - for(int i=0; i < values.length; ++i) {
    - if (values[i] != null) {
    - ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET));
    - }
    - }
    - ru.delete(100, new BigRow(8, 0, BUCKET));
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(5, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    -
    - // loop through the 5 splits and read each
    - for(int i=0; i < 4; ++i) {
    - System.out.println("starting split " + i);
    - rr = inf.getRecordReader(splits[i], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    -
    - // there should be exactly two rows per a split
    - for(int j=0; j < 2; ++j) {
    - System.out.println("i = " + i + ", j = " + j);
    - assertEquals(true, rr.next(key, value));
    - System.out.println("record = " + value);
    - assertEquals(i + "." + j, value.getFieldValue(2).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    - rr = inf.getRecordReader(splits[4], job, Reporter.NULL);
    - assertEquals(false, rr.next(rr.createKey(), rr.createValue()));
    - }
    -
    - /**
    - * Test the RecordReader when there is a new base and a delta.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderDelta() throws Exception {
    - final int BUCKET = 0;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf);
    - Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write a delta
    - AcidOutputFormat.Options options =
    - new AcidOutputFormat.Options(conf)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs)
    - .writingBase(false).minimumTransactionId(1).maximumTransactionId(1)
    - .finalDestination(root);
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values = new String[]{"a", "b", "c", "d", "e"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(1, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.minimumTransactionId(2).maximumTransactionId(2);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"f", "g", "h", "i", "j"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(2, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.min.split.size", "1");
    - job.set("mapred.max.split.size", "2");
    - job.set("mapred.input.dir", root.toString());
    - job.set("bucket_count", "1");
    - InputSplit[] splits = inf.getSplits(job, 5);
    - assertEquals(1, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    - rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    - values = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"};
    - OrcStruct row = rr.createValue();
    - for(int i = 0; i < values.length; ++i) {
    - System.out.println("Checking " + i);
    - assertEquals(true, rr.next(NullWritable.get(), row));
    - assertEquals(values[i], row.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(NullWritable.get(), row));
    - }
    -
    - /**
    - * Test the RecordReader when the delta has been flushed, but not closed.
    - * @throws Exception
    - */
    - @Test
    - public void testRecordReaderIncompleteDelta() throws Exception {
    - testRecordReaderIncompleteDelta(false);
    - testRecordReaderIncompleteDelta(true);
    - }
    - /**
    - *
    - * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001
    - */
    - private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception {
    - final int BUCKET = 1;
    - Configuration conf = new Configuration();
    - OrcOutputFormat of = new OrcOutputFormat();
    - FileSystem fs = FileSystem.getLocal(conf).getRaw();
    - Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs);
    - fs.delete(root, true);
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    -
    - // write a base
    - AcidOutputFormat.Options options =
    - new AcidOutputFormat.Options(conf)
    - .writingBase(true).minimumTransactionId(0).maximumTransactionId(0)
    - .bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root);
    - if(!use130Format) {
    - options.statementId(-1);
    - }
    - RecordUpdater ru = of.getRecordUpdater(root, options);
    - String[] values= new String[]{"1", "2", "3", "4", "5"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(0, new MyRow(values[i]));
    - }
    - ru.close(false);
    -
    - // write a delta
    - options.writingBase(false).minimumTransactionId(10)
    - .maximumTransactionId(19);
    - ru = of.getRecordUpdater(root, options);
    - values = new String[]{"6", "7", "8"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(1, new MyRow(values[i]));
    - }
    - InputFormat inf = new OrcInputFormat();
    - JobConf job = new JobConf();
    - job.set("mapred.input.dir", root.toString());
    - job.set("bucket_count", "2");
    -
    - // read the keys before the delta is flushed
    - InputSplit[] splits = inf.getSplits(job, 1);
    - assertEquals(2, splits.length);
    - org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr =
    - inf.getRecordReader(splits[0], job, Reporter.NULL);
    - NullWritable key = rr.createKey();
    - OrcStruct value = rr.createValue();
    - System.out.println("Looking at split " + splits[0]);
    - for(int i=1; i < 6; ++i) {
    - System.out.println("Checking row " + i);
    - assertEquals(true, rr.next(key, value));
    - assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    -
    - ru.flush();
    - ru.flush();
    - values = new String[]{"9", "10"};
    - for(int i=0; i < values.length; ++i) {
    - ru.insert(3, new MyRow(values[i]));
    - }
    - ru.flush();
    -
    - splits = inf.getSplits(job, 1);
    - assertEquals(2, splits.length);
    - rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    - Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10,19,0) :
    - AcidUtils.deltaSubdir(10,19)) + "/bucket_00001_flush_length");
    - assertEquals(true, fs.exists(sideFile));
    - assertEquals(24, fs.getFileStatus(sideFile).getLen());
    -
    - for(int i=1; i < 11; ++i) {
    - assertEquals(true, rr.next(key, value));
    - assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    - }
    - assertEquals(false, rr.next(key, value));
    - }
    -
    -}

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    new file mode 100644
    index 0000000..1a3559e
    --- /dev/null
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    @@ -0,0 +1,297 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +import java.io.ByteArrayOutputStream;
    +import java.io.File;
    +import java.io.PrintStream;
    +import java.util.Random;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +public class TestRLEv2 {
    + Path workDir = new Path(System.getProperty("test.tmp.dir",
    + "target" + File.separator + "test" + File.separator + "tmp"));
    + Path testFilePath;
    + Configuration conf;
    + FileSystem fs;
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new Configuration();
    + fs = FileSystem.getLocal(conf);
    + testFilePath = new Path(workDir, "TestRLEv2." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + @Test
    + public void testFixedDeltaZero() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(123);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
    + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOne() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 1 byte delta (delta = 1). In total, 4 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOneDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(512 - (i % 512));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 1 byte delta (delta = 1). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLarge() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512 + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLargeDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow((512 - i % 512) + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testShortRepeat() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5; ++i) {
    + w.addRow(10);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 1 byte header + 1 byte value
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testDeltaUnknownSign() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + w.addRow(0);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(i);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
    + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
    + // each, 5120/8 = 640). Total bytes 642
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testPatchedBase() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + Random rand = new Random(123);
    + w.addRow(10000000);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(rand.nextInt(i+1));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // use PATCHED_BASE encoding
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
    + System.setOut(origOut);
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    index 134f78c..070e2ab 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java
    @@ -1984,9 +1984,9 @@ public class TestVectorOrcFile {
                stripe.getDataLength() < 5000);
          }
          // with HIVE-7832, the dictionaries will be disabled after writing the first
    - // stripe as there are too many distinct values. Hence only 4 stripes as
    + // stripe as there are too many distinct values. Hence only 3 stripes as
          // compared to 25 stripes in version 0.11 (above test case)
    - assertEquals(4, i);
    + assertEquals(3, i);
          assertEquals(2500, reader.getNumberOfRows());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/resources/orc-file-has-null.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
    index bef44a5..d0b25da 100644
    --- a/ql/src/test/resources/orc-file-has-null.out
    +++ b/ql/src/test/resources/orc-file-has-null.out
    @@ -29,35 +29,35 @@ File Statistics:
        Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000

      Stripes:
    - Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163
    + Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 64
    - Stream: column 2 section ROW_INDEX start: 84 length 82
    - Stream: column 1 section DATA start: 166 length 159
    - Stream: column 1 section LENGTH start: 325 length 32
    - Stream: column 2 section PRESENT start: 357 length 13
    - Stream: column 2 section DATA start: 370 length 22
    - Stream: column 2 section LENGTH start: 392 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 398 length 9
    + Stream: column 1 section ROW_INDEX start: 20 length 60
    + Stream: column 2 section ROW_INDEX start: 80 length 77
    + Stream: column 1 section DATA start: 157 length 159
    + Stream: column 1 section LENGTH start: 316 length 15
    + Stream: column 2 section PRESENT start: 331 length 13
    + Stream: column 2 section DATA start: 344 length 18
    + Stream: column 2 section LENGTH start: 362 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 368 length 9
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[2]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0
    - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488
    - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488
    - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488
    - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488
    - Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 474 length 17
    - Stream: column 1 section ROW_INDEX start: 491 length 64
    - Stream: column 2 section ROW_INDEX start: 555 length 39
    - Stream: column 1 section DATA start: 594 length 159
    - Stream: column 1 section LENGTH start: 753 length 32
    - Stream: column 2 section PRESENT start: 785 length 11
    - Stream: column 2 section DATA start: 796 length 0
    - Stream: column 2 section LENGTH start: 796 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 796 length 0
    + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488
    + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488
    + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488
    + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488
    + Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 442 length 17
    + Stream: column 1 section ROW_INDEX start: 459 length 60
    + Stream: column 2 section ROW_INDEX start: 519 length 39
    + Stream: column 1 section DATA start: 558 length 159
    + Stream: column 1 section LENGTH start: 717 length 15
    + Stream: column 2 section PRESENT start: 732 length 11
    + Stream: column 2 section DATA start: 743 length 0
    + Stream: column 2 section LENGTH start: 743 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 743 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -67,34 +67,34 @@ Stripes:
            Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
    - Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149
    - Stream: column 0 section ROW_INDEX start: 860 length 17
    - Stream: column 1 section ROW_INDEX start: 877 length 64
    - Stream: column 2 section ROW_INDEX start: 941 length 68
    - Stream: column 1 section DATA start: 1009 length 159
    - Stream: column 1 section LENGTH start: 1168 length 32
    - Stream: column 2 section DATA start: 1200 length 24
    - Stream: column 2 section LENGTH start: 1224 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 1230 length 11
    + Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137
    + Stream: column 0 section ROW_INDEX start: 807 length 17
    + Stream: column 1 section ROW_INDEX start: 824 length 60
    + Stream: column 2 section ROW_INDEX start: 884 length 60
    + Stream: column 1 section DATA start: 944 length 159
    + Stream: column 1 section LENGTH start: 1103 length 15
    + Stream: column 2 section DATA start: 1118 length 15
    + Stream: column 2 section LENGTH start: 1133 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 1139 length 11
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[1]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0
    - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488
    - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464
    - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440
    - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416
    - Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 1304 length 17
    - Stream: column 1 section ROW_INDEX start: 1321 length 64
    - Stream: column 2 section ROW_INDEX start: 1385 length 39
    - Stream: column 1 section DATA start: 1424 length 159
    - Stream: column 1 section LENGTH start: 1583 length 32
    - Stream: column 2 section PRESENT start: 1615 length 11
    - Stream: column 2 section DATA start: 1626 length 0
    - Stream: column 2 section LENGTH start: 1626 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 1626 length 0
    + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488
    + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464
    + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440
    + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416
    + Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 1210 length 17
    + Stream: column 1 section ROW_INDEX start: 1227 length 60
    + Stream: column 2 section ROW_INDEX start: 1287 length 39
    + Stream: column 1 section DATA start: 1326 length 159
    + Stream: column 1 section LENGTH start: 1485 length 15
    + Stream: column 2 section PRESENT start: 1500 length 11
    + Stream: column 2 section DATA start: 1511 length 0
    + Stream: column 2 section LENGTH start: 1511 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 1511 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -105,6 +105,6 @@ Stripes:
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0

    -File length: 1940 bytes
    +File length: 1823 bytes
      Padding length: 0 bytes
      Padding ratio: 0%

    http://git-wip-us.apache.org/repos/asf/hive/blob/a603ed8d/ql/src/test/results/clientpositive/orc_file_dump.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    index 50d5701..c4a7d22 100644
    --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
    +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    @@ -129,7 +129,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -148,30 +148,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -192,7 +192,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33458 bytes
    +File length: 33416 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -247,7 +247,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 56
          Stream: column 1 section ROW_INDEX start: 79 length 58
    @@ -266,30 +266,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 10385 length 56
          Stream: column 8 section ROW_INDEX start: 10441 length 86
          Stream: column 8 section BLOOM_FILTER start: 10527 length 1829
    - Stream: column 9 section ROW_INDEX start: 12356 length 51
    - Stream: column 9 section BLOOM_FILTER start: 12407 length 95
    - Stream: column 10 section ROW_INDEX start: 12502 length 82
    - Stream: column 10 section BLOOM_FILTER start: 12584 length 1994
    - Stream: column 11 section ROW_INDEX start: 14578 length 47
    - Stream: column 11 section BLOOM_FILTER start: 14625 length 474
    - Stream: column 1 section PRESENT start: 15099 length 17
    - Stream: column 1 section DATA start: 15116 length 962
    - Stream: column 2 section PRESENT start: 16078 length 17
    - Stream: column 2 section DATA start: 16095 length 1441
    - Stream: column 3 section DATA start: 17536 length 1704
    - Stream: column 4 section DATA start: 19240 length 1998
    - Stream: column 5 section DATA start: 21238 length 2925
    - Stream: column 6 section DATA start: 24163 length 3323
    - Stream: column 7 section DATA start: 27486 length 137
    - Stream: column 8 section DATA start: 27623 length 1572
    - Stream: column 8 section LENGTH start: 29195 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548
    - Stream: column 9 section DATA start: 31053 length 62
    - Stream: column 9 section SECONDARY start: 31115 length 1783
    - Stream: column 10 section DATA start: 32898 length 2138
    - Stream: column 10 section SECONDARY start: 35036 length 231
    - Stream: column 11 section DATA start: 35267 length 1877
    - Stream: column 11 section LENGTH start: 37144 length 591
    + Stream: column 9 section ROW_INDEX start: 12356 length 50
    + Stream: column 9 section BLOOM_FILTER start: 12406 length 95
    + Stream: column 10 section ROW_INDEX start: 12501 length 82
    + Stream: column 10 section BLOOM_FILTER start: 12583 length 1994
    + Stream: column 11 section ROW_INDEX start: 14577 length 47
    + Stream: column 11 section BLOOM_FILTER start: 14624 length 474
    + Stream: column 1 section PRESENT start: 15098 length 17
    + Stream: column 1 section DATA start: 15115 length 962
    + Stream: column 2 section PRESENT start: 16077 length 17
    + Stream: column 2 section DATA start: 16094 length 1441
    + Stream: column 3 section DATA start: 17535 length 1704
    + Stream: column 4 section DATA start: 19239 length 1998
    + Stream: column 5 section DATA start: 21237 length 2925
    + Stream: column 6 section DATA start: 24162 length 3323
    + Stream: column 7 section DATA start: 27485 length 137
    + Stream: column 8 section DATA start: 27622 length 1572
    + Stream: column 8 section LENGTH start: 29194 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548
    + Stream: column 9 section DATA start: 31052 length 19
    + Stream: column 9 section SECONDARY start: 31071 length 1783
    + Stream: column 10 section DATA start: 32854 length 2138
    + Stream: column 10 section SECONDARY start: 34992 length 231
    + Stream: column 11 section DATA start: 35223 length 1877
    + Stream: column 11 section LENGTH start: 37100 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -310,7 +310,7 @@ Stripes:
            Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
            Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8

    -File length: 38613 bytes
    +File length: 38568 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -377,7 +377,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -396,30 +396,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -440,7 +440,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33458 bytes
    +File length: 33416 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
  • Prasanthj at Dec 3, 2015 at 6:58 am
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 7db94f072 -> 5628594b9


    HIVE-12537: RLEv2 doesn't seem to work (Prasanth Jayachandran reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5628594b
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5628594b
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5628594b

    Branch: refs/heads/branch-1
    Commit: 5628594b981117694b7dbd3539fa2ae34b744a27
    Parents: 7db94f0
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Thu Dec 3 00:58:27 2015 -0600
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Thu Dec 3 00:58:27 2015 -0600

    ----------------------------------------------------------------------
      .../ql/io/orc/RunLengthIntegerWriterV2.java | 43 ++-
      .../hadoop/hive/ql/io/orc/TestOrcFile.java | 4 +-
      .../apache/hadoop/hive/ql/io/orc/TestRLEv2.java | 297 +++++++++++++++++++
      ql/src/test/resources/orc-file-has-null.out | 94 +++---
      .../results/clientpositive/orc_file_dump.q.out | 156 +++++-----
      .../results/clientpositive/orc_merge11.q.out | 172 +++++------
      .../clientpositive/tez/orc_merge11.q.out | 172 +++++------
      7 files changed, 617 insertions(+), 321 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    index 6344a66..95f8cc8 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
    @@ -118,8 +118,8 @@ import java.io.IOException;
       * <li>8 bits for lower run length bits</li>
       * </ul>
       * </li>
    - * <li>Base value - encoded as varint</li>
    - * <li>Delta base - encoded as varint</li>
    + * <li>Base value - zigzag encoded value written as varint</li>
    + * <li>Delta base - zigzag encoded value written as varint</li>
       * <li>Delta blob - only positive values. monotonicity and orderness are decided
       * based on the sign of the base value and delta base</li>
       * </ul>
    @@ -472,29 +472,28 @@ class RunLengthIntegerWriterV2 implements IntegerWriter {
          // invariant - subtracting any number from any other in the literals after
          // this point won't overflow

    + // if min is equal to max then the delta is 0, this condition happens for
    + // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    + if (min == max) {
    + assert isFixedDelta : min + "==" + max +
    + ", isFixedDelta cannot be false";
    + assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    + fixedDelta = 0;
    + encoding = EncodingType.DELTA;
    + return;
    + }
    +
    + if (isFixedDelta) {
    + assert currDelta == initialDelta
    + : "currDelta should be equal to initialDelta for fixed delta encoding";
    + encoding = EncodingType.DELTA;
    + fixedDelta = currDelta;
    + return;
    + }
    +
          // if initialDelta is 0 then we cannot delta encode as we cannot identify
          // the sign of deltas (increasing or decreasing)
          if (initialDelta != 0) {
    -
    - // if min is equal to max then the delta is 0, this condition happens for
    - // fixed values run >10 which cannot be encoded with SHORT_REPEAT
    - if (min == max) {
    - assert isFixedDelta : min + "==" + max +
    - ", isFixedDelta cannot be false";
    - assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
    - fixedDelta = 0;
    - encoding = EncodingType.DELTA;
    - return;
    - }
    -
    - if (isFixedDelta) {
    - assert currDelta == initialDelta
    - : "currDelta should be equal to initialDelta for fixed delta encoding";
    - encoding = EncodingType.DELTA;
    - fixedDelta = currDelta;
    - return;
    - }
    -
            // stores the number of bits required for packing delta blob in
            // delta encoding
            bitsDeltaMax = utils.findClosestNumBits(deltaMax);

    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    index 255565e..6620a66 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    @@ -1895,9 +1895,9 @@ public class TestOrcFile {
                stripe.getDataLength() < 5000);
          }
          // with HIVE-7832, the dictionaries will be disabled after writing the first
    - // stripe as there are too many distinct values. Hence only 4 stripes as
    + // stripe as there are too many distinct values. Hence only 3 stripes as
          // compared to 25 stripes in version 0.11 (above test case)
    - assertEquals(4, i);
    + assertEquals(3, i);
          assertEquals(2500, reader.getNumberOfRows());
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    new file mode 100644
    index 0000000..1a3559e
    --- /dev/null
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRLEv2.java
    @@ -0,0 +1,297 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +import java.io.ByteArrayOutputStream;
    +import java.io.File;
    +import java.io.PrintStream;
    +import java.util.Random;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +public class TestRLEv2 {
    + Path workDir = new Path(System.getProperty("test.tmp.dir",
    + "target" + File.separator + "test" + File.separator + "tmp"));
    + Path testFilePath;
    + Configuration conf;
    + FileSystem fs;
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new Configuration();
    + fs = FileSystem.getLocal(conf);
    + testFilePath = new Path(workDir, "TestRLEv2." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + @Test
    + public void testFixedDeltaZero() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(123);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 bytes base (base = 123,
    + // zigzag encoded varint) and 1 byte delta (delta = 0). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOne() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 1 byte delta (delta = 1). In total, 4 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 40"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaOneDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(512 - (i % 512));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 1 byte delta (delta = 1). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLarge() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow(i % 512 + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 1 byte base (base = 0)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 5 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 50"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testFixedDeltaLargeDescending() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5120; ++i) {
    + w.addRow((512 - i % 512) + ((i % 512 ) * 100));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 10 runs of 512 elements. Each run has 2 bytes header, 2 byte base (base = 512, zigzag + varint)
    + // and 2 bytes delta (delta = 100, zigzag encoded varint). In total, 6 bytes per run.
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 60"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testShortRepeat() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + for (int i = 0; i < 5; ++i) {
    + w.addRow(10);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // 1 byte header + 1 byte value
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 2"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testDeltaUnknownSign() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + w.addRow(0);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(i);
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // monotonicity will be undetermined for this sequence 0,0,1,2,3,...510. Hence DIRECT encoding
    + // will be used. 2 bytes for header and 640 bytes for data (512 values with fixed bit of 10 bits
    + // each, 5120/8 = 640). Total bytes 642
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 642"));
    + System.setOut(origOut);
    + }
    +
    + @Test
    + public void testPatchedBase() throws Exception {
    + ObjectInspector inspector = ObjectInspectorFactory.getReflectionObjectInspector(
    + Integer.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + Writer w = OrcFile.createWriter(testFilePath,
    + OrcFile.writerOptions(conf)
    + .compress(CompressionKind.NONE)
    + .inspector(inspector)
    + .rowIndexStride(0)
    + .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
    + .version(OrcFile.Version.V_0_12)
    + );
    +
    + Random rand = new Random(123);
    + w.addRow(10000000);
    + for (int i = 0; i < 511; ++i) {
    + w.addRow(rand.nextInt(i+1));
    + }
    + w.close();
    +
    + PrintStream origOut = System.out;
    + ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toUri().toString()});
    + System.out.flush();
    + String outDump = new String(myOut.toByteArray());
    + // use PATCHED_BASE encoding
    + assertEquals(true, outDump.contains("Stream: column 0 section DATA start: 3 length 583"));
    + System.setOut(origOut);
    + }
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/resources/orc-file-has-null.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
    index 44c60b7..2b12ddb 100644
    --- a/ql/src/test/resources/orc-file-has-null.out
    +++ b/ql/src/test/resources/orc-file-has-null.out
    @@ -29,35 +29,35 @@ File Statistics:
        Column 2: count: 7000 hasNull: true min: RG1 max: STRIPE-3 sum: 46000

      Stripes:
    - Stripe: offset: 3 data: 241 rows: 5000 tail: 67 index: 163
    + Stripe: offset: 3 data: 220 rows: 5000 tail: 65 index: 154
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 64
    - Stream: column 2 section ROW_INDEX start: 84 length 82
    - Stream: column 1 section DATA start: 166 length 159
    - Stream: column 1 section LENGTH start: 325 length 32
    - Stream: column 2 section PRESENT start: 357 length 13
    - Stream: column 2 section DATA start: 370 length 22
    - Stream: column 2 section LENGTH start: 392 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 398 length 9
    + Stream: column 1 section ROW_INDEX start: 20 length 60
    + Stream: column 2 section ROW_INDEX start: 80 length 77
    + Stream: column 1 section DATA start: 157 length 159
    + Stream: column 1 section LENGTH start: 316 length 15
    + Stream: column 2 section PRESENT start: 331 length 13
    + Stream: column 2 section DATA start: 344 length 18
    + Stream: column 2 section LENGTH start: 362 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 368 length 9
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[2]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: RG1 max: RG1 sum: 3000 positions: 0,0,0,0,0,0,0
    - Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,66,488
    - Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,66,488
    - Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,136,488
    - Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,136,488
    - Stripe: offset: 474 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 474 length 17
    - Stream: column 1 section ROW_INDEX start: 491 length 64
    - Stream: column 2 section ROW_INDEX start: 555 length 39
    - Stream: column 1 section DATA start: 594 length 159
    - Stream: column 1 section LENGTH start: 753 length 32
    - Stream: column 2 section PRESENT start: 785 length 11
    - Stream: column 2 section DATA start: 796 length 0
    - Stream: column 2 section LENGTH start: 796 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 796 length 0
    + Entry 1: count: 0 hasNull: true positions: 0,0,125,0,0,4,488
    + Entry 2: count: 1000 hasNull: false min: RG3 max: RG3 sum: 3000 positions: 0,2,125,0,0,4,488
    + Entry 3: count: 0 hasNull: true positions: 0,4,125,0,0,12,488
    + Entry 4: count: 0 hasNull: true positions: 0,6,125,0,0,12,488
    + Stripe: offset: 442 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 442 length 17
    + Stream: column 1 section ROW_INDEX start: 459 length 60
    + Stream: column 2 section ROW_INDEX start: 519 length 39
    + Stream: column 1 section DATA start: 558 length 159
    + Stream: column 1 section LENGTH start: 717 length 15
    + Stream: column 2 section PRESENT start: 732 length 11
    + Stream: column 2 section DATA start: 743 length 0
    + Stream: column 2 section LENGTH start: 743 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 743 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -67,34 +67,34 @@ Stripes:
            Entry 2: count: 0 hasNull: true positions: 0,2,120,0,0,0,0
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
    - Stripe: offset: 860 data: 232 rows: 5000 tail: 63 index: 149
    - Stream: column 0 section ROW_INDEX start: 860 length 17
    - Stream: column 1 section ROW_INDEX start: 877 length 64
    - Stream: column 2 section ROW_INDEX start: 941 length 68
    - Stream: column 1 section DATA start: 1009 length 159
    - Stream: column 1 section LENGTH start: 1168 length 32
    - Stream: column 2 section DATA start: 1200 length 24
    - Stream: column 2 section LENGTH start: 1224 length 6
    - Stream: column 2 section DICTIONARY_DATA start: 1230 length 11
    + Stripe: offset: 807 data: 206 rows: 5000 tail: 60 index: 137
    + Stream: column 0 section ROW_INDEX start: 807 length 17
    + Stream: column 1 section ROW_INDEX start: 824 length 60
    + Stream: column 2 section ROW_INDEX start: 884 length 60
    + Stream: column 1 section DATA start: 944 length 159
    + Stream: column 1 section LENGTH start: 1103 length 15
    + Stream: column 2 section DATA start: 1118 length 15
    + Stream: column 2 section LENGTH start: 1133 length 6
    + Stream: column 2 section DICTIONARY_DATA start: 1139 length 11
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[1]
          Row group indices for column 2:
            Entry 0: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,0,0
    - Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,66,488
    - Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,198,464
    - Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,330,440
    - Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,462,416
    - Stripe: offset: 1304 data: 202 rows: 5000 tail: 64 index: 120
    - Stream: column 0 section ROW_INDEX start: 1304 length 17
    - Stream: column 1 section ROW_INDEX start: 1321 length 64
    - Stream: column 2 section ROW_INDEX start: 1385 length 39
    - Stream: column 1 section DATA start: 1424 length 159
    - Stream: column 1 section LENGTH start: 1583 length 32
    - Stream: column 2 section PRESENT start: 1615 length 11
    - Stream: column 2 section DATA start: 1626 length 0
    - Stream: column 2 section LENGTH start: 1626 length 0
    - Stream: column 2 section DICTIONARY_DATA start: 1626 length 0
    + Entry 1: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,4,488
    + Entry 2: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,12,464
    + Entry 3: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,20,440
    + Entry 4: count: 1000 hasNull: false min: STRIPE-3 max: STRIPE-3 sum: 8000 positions: 0,28,416
    + Stripe: offset: 1210 data: 185 rows: 5000 tail: 64 index: 116
    + Stream: column 0 section ROW_INDEX start: 1210 length 17
    + Stream: column 1 section ROW_INDEX start: 1227 length 60
    + Stream: column 2 section ROW_INDEX start: 1287 length 39
    + Stream: column 1 section DATA start: 1326 length 159
    + Stream: column 1 section LENGTH start: 1485 length 15
    + Stream: column 2 section PRESENT start: 1500 length 11
    + Stream: column 2 section DATA start: 1511 length 0
    + Stream: column 2 section LENGTH start: 1511 length 0
    + Stream: column 2 section DICTIONARY_DATA start: 1511 length 0
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[0]
    @@ -105,6 +105,6 @@ Stripes:
            Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
            Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0

    -File length: 1940 bytes
    +File length: 1823 bytes
      Padding length: 0 bytes
      Padding ratio: 0%

    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    index 67aa189..7503c81 100644
    --- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
    +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
    @@ -129,7 +129,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -148,30 +148,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -192,7 +192,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33456 bytes
    +File length: 33413 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -247,7 +247,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 15095
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 56
          Stream: column 1 section ROW_INDEX start: 79 length 58
    @@ -266,30 +266,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 10385 length 56
          Stream: column 8 section ROW_INDEX start: 10441 length 86
          Stream: column 8 section BLOOM_FILTER start: 10527 length 1829
    - Stream: column 9 section ROW_INDEX start: 12356 length 51
    - Stream: column 9 section BLOOM_FILTER start: 12407 length 95
    - Stream: column 10 section ROW_INDEX start: 12502 length 82
    - Stream: column 10 section BLOOM_FILTER start: 12584 length 1994
    - Stream: column 11 section ROW_INDEX start: 14578 length 47
    - Stream: column 11 section BLOOM_FILTER start: 14625 length 474
    - Stream: column 1 section PRESENT start: 15099 length 17
    - Stream: column 1 section DATA start: 15116 length 962
    - Stream: column 2 section PRESENT start: 16078 length 17
    - Stream: column 2 section DATA start: 16095 length 1441
    - Stream: column 3 section DATA start: 17536 length 1704
    - Stream: column 4 section DATA start: 19240 length 1998
    - Stream: column 5 section DATA start: 21238 length 2925
    - Stream: column 6 section DATA start: 24163 length 3323
    - Stream: column 7 section DATA start: 27486 length 137
    - Stream: column 8 section DATA start: 27623 length 1572
    - Stream: column 8 section LENGTH start: 29195 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548
    - Stream: column 9 section DATA start: 31053 length 62
    - Stream: column 9 section SECONDARY start: 31115 length 1783
    - Stream: column 10 section DATA start: 32898 length 2138
    - Stream: column 10 section SECONDARY start: 35036 length 231
    - Stream: column 11 section DATA start: 35267 length 1877
    - Stream: column 11 section LENGTH start: 37144 length 591
    + Stream: column 9 section ROW_INDEX start: 12356 length 50
    + Stream: column 9 section BLOOM_FILTER start: 12406 length 95
    + Stream: column 10 section ROW_INDEX start: 12501 length 82
    + Stream: column 10 section BLOOM_FILTER start: 12583 length 1994
    + Stream: column 11 section ROW_INDEX start: 14577 length 47
    + Stream: column 11 section BLOOM_FILTER start: 14624 length 474
    + Stream: column 1 section PRESENT start: 15098 length 17
    + Stream: column 1 section DATA start: 15115 length 962
    + Stream: column 2 section PRESENT start: 16077 length 17
    + Stream: column 2 section DATA start: 16094 length 1441
    + Stream: column 3 section DATA start: 17535 length 1704
    + Stream: column 4 section DATA start: 19239 length 1998
    + Stream: column 5 section DATA start: 21237 length 2925
    + Stream: column 6 section DATA start: 24162 length 3323
    + Stream: column 7 section DATA start: 27485 length 137
    + Stream: column 8 section DATA start: 27622 length 1572
    + Stream: column 8 section LENGTH start: 29194 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 29504 length 1548
    + Stream: column 9 section DATA start: 31052 length 19
    + Stream: column 9 section SECONDARY start: 31071 length 1783
    + Stream: column 10 section DATA start: 32854 length 2138
    + Stream: column 10 section SECONDARY start: 34992 length 231
    + Stream: column 11 section DATA start: 35223 length 1877
    + Stream: column 11 section LENGTH start: 37100 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -310,7 +310,7 @@ Stripes:
            Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
            Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8

    -File length: 38610 bytes
    +File length: 38565 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -377,7 +377,7 @@ File Statistics:
        Column 11: count: 1049 hasNull: false sum: 13278

      Stripes:
    - Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944
    + Stripe: offset: 3 data: 22593 rows: 1049 tail: 250 index: 9943
          Stream: column 0 section ROW_INDEX start: 3 length 20
          Stream: column 0 section BLOOM_FILTER start: 23 length 45
          Stream: column 1 section ROW_INDEX start: 68 length 58
    @@ -396,30 +396,30 @@ Stripes:
          Stream: column 7 section BLOOM_FILTER start: 6812 length 45
          Stream: column 8 section ROW_INDEX start: 6857 length 86
          Stream: column 8 section BLOOM_FILTER start: 6943 length 1157
    - Stream: column 9 section ROW_INDEX start: 8100 length 51
    - Stream: column 9 section BLOOM_FILTER start: 8151 length 62
    - Stream: column 10 section ROW_INDEX start: 8213 length 82
    - Stream: column 10 section BLOOM_FILTER start: 8295 length 1297
    - Stream: column 11 section ROW_INDEX start: 9592 length 47
    - Stream: column 11 section BLOOM_FILTER start: 9639 length 308
    - Stream: column 1 section PRESENT start: 9947 length 17
    - Stream: column 1 section DATA start: 9964 length 962
    - Stream: column 2 section PRESENT start: 10926 length 17
    - Stream: column 2 section DATA start: 10943 length 1441
    - Stream: column 3 section DATA start: 12384 length 1704
    - Stream: column 4 section DATA start: 14088 length 1998
    - Stream: column 5 section DATA start: 16086 length 2925
    - Stream: column 6 section DATA start: 19011 length 3323
    - Stream: column 7 section DATA start: 22334 length 137
    - Stream: column 8 section DATA start: 22471 length 1572
    - Stream: column 8 section LENGTH start: 24043 length 310
    - Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548
    - Stream: column 9 section DATA start: 25901 length 62
    - Stream: column 9 section SECONDARY start: 25963 length 1783
    - Stream: column 10 section DATA start: 27746 length 2138
    - Stream: column 10 section SECONDARY start: 29884 length 231
    - Stream: column 11 section DATA start: 30115 length 1877
    - Stream: column 11 section LENGTH start: 31992 length 591
    + Stream: column 9 section ROW_INDEX start: 8100 length 50
    + Stream: column 9 section BLOOM_FILTER start: 8150 length 62
    + Stream: column 10 section ROW_INDEX start: 8212 length 82
    + Stream: column 10 section BLOOM_FILTER start: 8294 length 1297
    + Stream: column 11 section ROW_INDEX start: 9591 length 47
    + Stream: column 11 section BLOOM_FILTER start: 9638 length 308
    + Stream: column 1 section PRESENT start: 9946 length 17
    + Stream: column 1 section DATA start: 9963 length 962
    + Stream: column 2 section PRESENT start: 10925 length 17
    + Stream: column 2 section DATA start: 10942 length 1441
    + Stream: column 3 section DATA start: 12383 length 1704
    + Stream: column 4 section DATA start: 14087 length 1998
    + Stream: column 5 section DATA start: 16085 length 2925
    + Stream: column 6 section DATA start: 19010 length 3323
    + Stream: column 7 section DATA start: 22333 length 137
    + Stream: column 8 section DATA start: 22470 length 1572
    + Stream: column 8 section LENGTH start: 24042 length 310
    + Stream: column 8 section DICTIONARY_DATA start: 24352 length 1548
    + Stream: column 9 section DATA start: 25900 length 19
    + Stream: column 9 section SECONDARY start: 25919 length 1783
    + Stream: column 10 section DATA start: 27702 length 2138
    + Stream: column 10 section SECONDARY start: 29840 length 231
    + Stream: column 11 section DATA start: 30071 length 1877
    + Stream: column 11 section LENGTH start: 31948 length 591
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT
          Encoding column 2: DIRECT_V2
    @@ -440,7 +440,7 @@ Stripes:
            Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
            Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5

    -File length: 33456 bytes
    +File length: 33413 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --

    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/results/clientpositive/orc_merge11.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/orc_merge11.q.out b/ql/src/test/results/clientpositive/orc_merge11.q.out
    index da608db..1c4eb0a 100644
    --- a/ql/src/test/results/clientpositive/orc_merge11.q.out
    +++ b/ql/src/test/results/clientpositive/orc_merge11.q.out
    @@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1
      #### A masked pattern was here ####
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 50000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -96,22 +96,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -120,22 +120,22 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6828 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 50000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -155,22 +155,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -179,12 +179,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6828 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1
      #### A masked pattern was here ####
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 100000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -244,22 +244,22 @@ File Statistics:
        Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -268,26 +268,26 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
    - Stream: column 0 section ROW_INDEX start: 10733 length 17
    - Stream: column 1 section ROW_INDEX start: 10750 length 85
    - Stream: column 2 section ROW_INDEX start: 10835 length 87
    - Stream: column 3 section ROW_INDEX start: 10922 length 111
    - Stream: column 4 section ROW_INDEX start: 11033 length 108
    - Stream: column 5 section ROW_INDEX start: 11141 length 101
    - Stream: column 1 section DATA start: 11242 length 871
    - Stream: column 2 section DATA start: 12113 length 362
    - Stream: column 2 section LENGTH start: 12475 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
    - Stream: column 3 section DATA start: 12506 length 5167
    - Stream: column 4 section DATA start: 17673 length 524
    - Stream: column 4 section SECONDARY start: 18197 length 118
    - Stream: column 5 section DATA start: 18315 length 2913
    - Stream: column 5 section SECONDARY start: 21228 length 118
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
    + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
    + Stream: column 0 section ROW_INDEX start: 6511 length 17
    + Stream: column 1 section ROW_INDEX start: 6528 length 83
    + Stream: column 2 section ROW_INDEX start: 6611 length 81
    + Stream: column 3 section ROW_INDEX start: 6692 length 111
    + Stream: column 4 section ROW_INDEX start: 6803 length 110
    + Stream: column 5 section ROW_INDEX start: 6913 length 96
    + Stream: column 1 section DATA start: 7009 length 45
    + Stream: column 2 section DATA start: 7054 length 41
    + Stream: column 2 section LENGTH start: 7095 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
    + Stream: column 3 section DATA start: 7126 length 5167
    + Stream: column 4 section DATA start: 12293 length 524
    + Stream: column 4 section SECONDARY start: 12817 length 18
    + Stream: column 5 section DATA start: 12835 length 53
    + Stream: column 5 section SECONDARY start: 12888 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -296,12 +296,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 21814 bytes
    +File length: 13348 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --

    http://git-wip-us.apache.org/repos/asf/hive/blob/5628594b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    index da608db..1c4eb0a 100644
    --- a/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    +++ b/ql/src/test/results/clientpositive/tez/orc_merge11.q.out
    @@ -72,11 +72,11 @@ PREHOOK: Input: default@orcfile_merge1
      #### A masked pattern was here ####
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 50000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -96,22 +96,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -120,22 +120,22 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6828 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 50000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -155,22 +155,22 @@ File Statistics:
        Column 5: count: 50000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -179,12 +179,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 11071 bytes
    +File length: 6828 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --
    @@ -213,11 +213,11 @@ PREHOOK: Input: default@orcfile_merge1
      #### A masked pattern was here ####
      -- BEGIN ORC FILE DUMP --
      #### A masked pattern was here ####
    -File Version: 0.12 with HIVE_4243
    +File Version: 0.12 with HIVE_8732
      Rows: 100000
      Compression: ZLIB
      Compression size: 4096
    -Type: struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(10,0),ts:timestamp>
    +Type: struct<_col0:bigint,_col1:string,_col2:double,_col3:decimal(10,0),_col4:timestamp>

      Stripe Statistics:
        Stripe 1:
    @@ -244,22 +244,22 @@ File Statistics:
        Column 5: count: 100000 hasNull: false min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:04:10.0

      Stripes:
    - Stripe: offset: 3 data: 10104 rows: 50000 tail: 117 index: 509
    + Stripe: offset: 3 data: 5897 rows: 50000 tail: 113 index: 498
          Stream: column 0 section ROW_INDEX start: 3 length 17
    - Stream: column 1 section ROW_INDEX start: 20 length 85
    - Stream: column 2 section ROW_INDEX start: 105 length 87
    - Stream: column 3 section ROW_INDEX start: 192 length 111
    - Stream: column 4 section ROW_INDEX start: 303 length 108
    - Stream: column 5 section ROW_INDEX start: 411 length 101
    - Stream: column 1 section DATA start: 512 length 871
    - Stream: column 2 section DATA start: 1383 length 362
    - Stream: column 2 section LENGTH start: 1745 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 1753 length 23
    - Stream: column 3 section DATA start: 1776 length 5167
    - Stream: column 4 section DATA start: 6943 length 524
    - Stream: column 4 section SECONDARY start: 7467 length 118
    - Stream: column 5 section DATA start: 7585 length 2913
    - Stream: column 5 section SECONDARY start: 10498 length 118
    + Stream: column 1 section ROW_INDEX start: 20 length 83
    + Stream: column 2 section ROW_INDEX start: 103 length 81
    + Stream: column 3 section ROW_INDEX start: 184 length 111
    + Stream: column 4 section ROW_INDEX start: 295 length 110
    + Stream: column 5 section ROW_INDEX start: 405 length 96
    + Stream: column 1 section DATA start: 501 length 45
    + Stream: column 2 section DATA start: 546 length 41
    + Stream: column 2 section LENGTH start: 587 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 595 length 23
    + Stream: column 3 section DATA start: 618 length 5167
    + Stream: column 4 section DATA start: 5785 length 524
    + Stream: column 4 section SECONDARY start: 6309 length 18
    + Stream: column 5 section DATA start: 6327 length 53
    + Stream: column 5 section SECONDARY start: 6380 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -268,26 +268,26 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    - Stripe: offset: 10733 data: 10104 rows: 50000 tail: 117 index: 509
    - Stream: column 0 section ROW_INDEX start: 10733 length 17
    - Stream: column 1 section ROW_INDEX start: 10750 length 85
    - Stream: column 2 section ROW_INDEX start: 10835 length 87
    - Stream: column 3 section ROW_INDEX start: 10922 length 111
    - Stream: column 4 section ROW_INDEX start: 11033 length 108
    - Stream: column 5 section ROW_INDEX start: 11141 length 101
    - Stream: column 1 section DATA start: 11242 length 871
    - Stream: column 2 section DATA start: 12113 length 362
    - Stream: column 2 section LENGTH start: 12475 length 8
    - Stream: column 2 section DICTIONARY_DATA start: 12483 length 23
    - Stream: column 3 section DATA start: 12506 length 5167
    - Stream: column 4 section DATA start: 17673 length 524
    - Stream: column 4 section SECONDARY start: 18197 length 118
    - Stream: column 5 section DATA start: 18315 length 2913
    - Stream: column 5 section SECONDARY start: 21228 length 118
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391
    + Stripe: offset: 6511 data: 5897 rows: 50000 tail: 113 index: 498
    + Stream: column 0 section ROW_INDEX start: 6511 length 17
    + Stream: column 1 section ROW_INDEX start: 6528 length 83
    + Stream: column 2 section ROW_INDEX start: 6611 length 81
    + Stream: column 3 section ROW_INDEX start: 6692 length 111
    + Stream: column 4 section ROW_INDEX start: 6803 length 110
    + Stream: column 5 section ROW_INDEX start: 6913 length 96
    + Stream: column 1 section DATA start: 7009 length 45
    + Stream: column 2 section DATA start: 7054 length 41
    + Stream: column 2 section LENGTH start: 7095 length 8
    + Stream: column 2 section DICTIONARY_DATA start: 7103 length 23
    + Stream: column 3 section DATA start: 7126 length 5167
    + Stream: column 4 section DATA start: 12293 length 524
    + Stream: column 4 section SECONDARY start: 12817 length 18
    + Stream: column 5 section DATA start: 12835 length 53
    + Stream: column 5 section SECONDARY start: 12888 length 18
          Encoding column 0: DIRECT
          Encoding column 1: DIRECT_V2
          Encoding column 2: DICTIONARY_V2[6]
    @@ -296,12 +296,12 @@ Stripes:
          Encoding column 5: DIRECT_V2
          Row group indices for column 1:
            Entry 0: count: 10000 hasNull: false min: 2 max: 100 sum: 999815 positions: 0,0,0
    - Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 133,1071,391
    - Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 292,2147,391
    - Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 453,3223,391
    - Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 683,203,391
    + Entry 1: count: 10000 hasNull: false min: 29 max: 100 sum: 999899 positions: 0,101,391
    + Entry 2: count: 10000 hasNull: false min: 2 max: 100 sum: 999807 positions: 0,207,391
    + Entry 3: count: 10000 hasNull: false min: 13 max: 100 sum: 999842 positions: 0,313,391
    + Entry 4: count: 10000 hasNull: false min: 5 max: 100 sum: 999875 positions: 0,419,391

    -File length: 21814 bytes
    +File length: 13348 bytes
      Padding length: 0 bytes
      Padding ratio: 0%
      -- END ORC FILE DUMP --

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 3, '15 at 6:21a
activeDec 3, '15 at 6:58a
posts5
users1
websitehive.apache.org

1 user in discussion

Prasanthj: 5 posts

People

Translate

site design / logo © 2021 Grokbase