FAQ
Author: kevinwilfong
Date: Tue Mar 5 20:44:50 2013
New Revision: 1452992

URL: http://svn.apache.org/r1452992
Log:
HIVE-3874. Create a new Optimized Row Columnar file format for Hive. (Owen O'Malley via kevinwilfong)

Added:
     hive/trunk/ql/src/gen/protobuf/
     hive/trunk/ql/src/gen/protobuf/gen-java/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
     hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/package-info.java
     hive/trunk/ql/src/protobuf/
     hive/trunk/ql/src/protobuf/org/
     hive/trunk/ql/src/protobuf/org/apache/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/
     hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java
     hive/trunk/ql/src/test/resources/orc-file-dump.out
Modified:
     hive/trunk/build.properties
     hive/trunk/build.xml
     hive/trunk/ivy/libraries.properties
     hive/trunk/ql/build.xml
     hive/trunk/ql/ivy.xml

Modified: hive/trunk/build.properties
URL: http://svn.apache.org/viewvc/hive/trunk/build.properties?rev=1452992&r1=1452991&r2=1452992&view=diff
==============================================================================
--- hive/trunk/build.properties (original)
+++ hive/trunk/build.properties Tue Mar 5 20:44:50 2013
@@ -76,6 +76,7 @@ iterate.hive.all=ant,shims,common,serde,
  iterate.hive.modules=shims,common,serde,metastore,ql,contrib,service,cli,jdbc,hwi,hbase-handler,pdk,builtins
  iterate.hive.tests=ql,contrib,hbase-handler,hwi,jdbc,metastore,odbc,serde,service
  iterate.hive.thrift=ql,service,metastore,serde
+iterate.hive.protobuf=ql
  iterate.hive.cpp=odbc

  #

Modified: hive/trunk/build.xml
URL: http://svn.apache.org/viewvc/hive/trunk/build.xml?rev=1452992&r1=1452991&r2=1452992&view=diff
==============================================================================
--- hive/trunk/build.xml (original)
+++ hive/trunk/build.xml Tue Mar 5 20:44:50 2013
@@ -256,6 +256,11 @@
      <iterate target="thriftif" iterate="${iterate.hive.thrift}"/>
    </target>

+ <target name="protobuf">
+ <echo message="Project: ${ant.project.name}"/>
+ <iterate target="protobuf" iterate="${iterate.hive.protobuf}"/>
+ </target>
+
    <target name="jar"
            depends="init"
            description="Build JAR artifacts">

Modified: hive/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hive/trunk/ivy/libraries.properties?rev=1452992&r1=1452991&r2=1452992&view=diff
==============================================================================
--- hive/trunk/ivy/libraries.properties (original)
+++ hive/trunk/ivy/libraries.properties Tue Mar 5 20:44:50 2013
@@ -60,9 +60,11 @@ libthrift.version=0.9.0
  log4j.version=1.2.16
  maven-ant-tasks.version=2.1.0
  mockito-all.version=1.8.2
+protobuf.version=2.4.1
  rat.version=0.8
  slf4j-api.version=1.6.1
  slf4j-log4j12.version=1.6.1
+snappy.version=0.2
  velocity.version=1.5
  zookeeper.version=3.4.3
  javolution.version=5.5.1

Modified: hive/trunk/ql/build.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/build.xml?rev=1452992&r1=1452991&r2=1452992&view=diff
==============================================================================
--- hive/trunk/ql/build.xml (original)
+++ hive/trunk/ql/build.xml Tue Mar 5 20:44:50 2013
@@ -22,6 +22,9 @@

    <property name="ql.lib.dir" value="${basedir}/lib"/>
    <property name="src.dir" location="${basedir}/src/java"/>
+ <property name="protobuf.src.dir" location="${basedir}/src/protobuf"/>
+ <property name="protobuf.build.dir"
+ location="${basedir}/src/gen/protobuf/gen-java"/>
    <property name="ql.test.query.dir" location="${basedir}/src/test/queries"/>
    <property name="ql.test.template.dir" location="${basedir}/src/test/templates"/>
    <property name="ql.test.results.dir" location="${basedir}/src/test/results"/>
@@ -147,16 +150,28 @@
      </java>
    </target>

+ <target name="protobuf">
+ <echo message="Project: ${ant.project.name}"/>
+ <echo>Building ORC Protobuf</echo>
+ <mkdir dir="${protobuf.build.dir}"/>
+ <exec executable="protoc" failonerror="true">
+ <arg value="--java_out=${protobuf.build.dir}"/>
+ <arg value="-I=${protobuf.src.dir}/org/apache/hadoop/hive/ql/io/orc"/>
+ <arg value="${protobuf.src.dir}/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto"/>
+ </exec>
+ </target>
+
    <target name="ql-init">
      <echo message="Project: ${ant.project.name}"/>
      <mkdir dir="${build.dir}/gen/antlr/gen-java/org/apache/hadoop/hive/ql/parse"/>
    </target>

- <target name="compile" depends="init, ql-init, ivy-retrieve, build-grammar">
+ <target name="compile"
+ depends="init, ql-init, ivy-retrieve, build-grammar">
      <echo message="Project: ${ant.project.name}"/>
      <javac
       encoding="${build.encoding}"
- srcdir="${src.dir}:${basedir}/src/gen/thrift/gen-javabean:${build.dir}/gen/antlr/gen-java"
+ srcdir="${src.dir}:${basedir}/src/gen/thrift/gen-javabean:${build.dir}/gen/antlr/gen-java:${protobuf.build.dir}"
       includes="**/*.java"
       destdir="${build.classes}"
       debug="${javac.debug}"

Modified: hive/trunk/ql/ivy.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/ivy.xml?rev=1452992&r1=1452991&r2=1452992&view=diff
==============================================================================
--- hive/trunk/ql/ivy.xml (original)
+++ hive/trunk/ql/ivy.xml Tue Mar 5 20:44:50 2013
@@ -38,6 +38,10 @@
                  conf="test->default" transitive="false"/>
      <dependency org="org.antlr" name="antlr" rev="${antlr.version}"
                  transitive="false"/>
+ <dependency org="com.google.protobuf" name="protobuf-java"
+ rev="${protobuf.version}" transitive="false"/>
+ <dependency org="org.iq80.snappy" name="snappy"
+ rev="${snappy.version}" transitive="false"/>

      <!-- hadoop specific guava -->
      <dependency org="com.google.guava" name="guava" rev="${guava-hadoop20.version}"

Search Discussions

  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RedBlackTree.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,326 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * A memory efficient red-black tree that does not allocate any objects per
    + * an element. This class is abstract and assumes that the child class
    + * handles the key and comparisons with the key.
    + */
    +abstract class RedBlackTree {
    + public static final int NULL = -1;
    + private static final int DEFAULT_INITIAL_CAPACITY = 16 * 1024;
    +
    + // Various values controlling the offset of the data within the array.
    + private static final int LEFT_OFFSET = 0;
    + private static final int RIGHT_OFFSET = 1;
    + private static final int COUNT_OFFSET = 2;
    + private static final int ELEMENT_SIZE = 3;
    +
    + protected int size = 0;
    + private final DynamicIntArray data;
    + protected int root = NULL;
    + protected int lastAdd = 0;
    + private boolean wasAdd = false;
    +
    + /**
    + * Create a set with a default initial capacity.
    + */
    + public RedBlackTree() {
    + data = new DynamicIntArray(DEFAULT_INITIAL_CAPACITY * ELEMENT_SIZE);
    + }
    +
    + /**
    + * Create a set with the given initial capacity.
    + */
    + public RedBlackTree(int initialCapacity) {
    + data = new DynamicIntArray(initialCapacity * ELEMENT_SIZE);
    + }
    +
    + /**
    + * Insert a new node into the data array, growing the array as necessary.
    + *
    + * @return Returns the position of the new node.
    + */
    + private int insert(int left, int right, boolean isRed) {
    + int position = size;
    + size += 1;
    + setLeft(position, left, isRed);
    + setRight(position, right);
    + setCount(position, 1);
    + return position;
    + }
    +
    + /**
    + * Compare the value at the given position to the new value.
    + * @return 0 if the values are the same, -1 if the new value is smaller and
    + * 1 if the new value is larger.
    + */
    + protected abstract int compareValue(int position);
    +
    + /**
    + * Is the given node red as opposed to black? To prevent having an extra word
    + * in the data array, we just the low bit on the left child index.
    + */
    + protected boolean isRed(int position) {
    + return position != NULL &&
    + (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
    + }
    +
    + /**
    + * Set the red bit true or false.
    + */
    + private void setRed(int position, boolean isRed) {
    + int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
    + if (isRed) {
    + data.set(offset, data.get(offset) | 1);
    + } else {
    + data.set(offset, data.get(offset) & ~1);
    + }
    + }
    +
    + /**
    + * Get the left field of the given position.
    + */
    + protected int getLeft(int position) {
    + return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
    + }
    +
    + /**
    + * Get the right field of the given position.
    + */
    + protected int getRight(int position) {
    + return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
    + }
    +
    + protected int getCount(int position) {
    + return data.get(position * ELEMENT_SIZE + COUNT_OFFSET);
    + }
    +
    + private void setCount(int position, int value) {
    + data.set(position * ELEMENT_SIZE + COUNT_OFFSET, value);
    + }
    +
    + private void incrementCount(int position, int value) {
    + data.increment(position * ELEMENT_SIZE + COUNT_OFFSET, value);
    + }
    +
    + /**
    + * Set the left field of the given position.
    + * Note that we are storing the node color in the low bit of the left pointer.
    + */
    + private void setLeft(int position, int left) {
    + int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
    + data.set(offset, (left << 1) | (data.get(offset) & 1));
    + }
    +
    + /**
    + * Set the left field of the given position.
    + * Note that we are storing the node color in the low bit of the left pointer.
    + */
    + private void setLeft(int position, int left, boolean isRed) {
    + int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
    + data.set(offset, (left << 1) | (isRed ? 1 : 0));
    + }
    +
    + /**
    + * Set the right field of the given position.
    + */
    + private void setRight(int position, int right) {
    + data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
    + }
    +
    + /**
    + * Insert or find a given key in the tree and rebalance the tree correctly.
    + * Rebalancing restores the red-black aspect of the tree to maintain the
    + * invariants:
    + * 1. If a node is red, both of its children are black.
    + * 2. Each child of a node has the same black height (the number of black
    + * nodes between it and the leaves of the tree).
    + *
    + * Inserted nodes are at the leaves and are red, therefore there is at most a
    + * violation of rule 1 at the node we just put in. Instead of always keeping
    + * the parents, this routine passing down the context.
    + *
    + * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
    + * left-right mirror images of each other). See Algorighms by Cormen,
    + * Leiserson, and Rivest for the explaination of the subcases.
    + *
    + * @param node The node that we are fixing right now.
    + * @param fromLeft Did we come down from the left?
    + * @param parent Nodes' parent
    + * @param grandparent Parent's parent
    + * @param greatGrandparent Grandparent's parent
    + * @return Does parent also need to be checked and/or fixed?
    + */
    + private boolean add(int node, boolean fromLeft, int parent,
    + int grandparent, int greatGrandparent) {
    + if (node == NULL) {
    + if (root == NULL) {
    + lastAdd = insert(NULL, NULL, false);
    + root = lastAdd;
    + wasAdd = true;
    + return false;
    + } else {
    + lastAdd = insert(NULL, NULL, true);
    + node = lastAdd;
    + wasAdd = true;
    + // connect the new node into the tree
    + if (fromLeft) {
    + setLeft(parent, node);
    + } else {
    + setRight(parent, node);
    + }
    + }
    + } else {
    + int compare = compareValue(node);
    + boolean keepGoing;
    +
    + // Recurse down to find where the node needs to be added
    + if (compare < 0) {
    + keepGoing = add(getLeft(node), true, node, parent, grandparent);
    + } else if (compare > 0) {
    + keepGoing = add(getRight(node), false, node, parent, grandparent);
    + } else {
    + lastAdd = node;
    + wasAdd = false;
    + incrementCount(node, 1);
    + return false;
    + }
    +
    + // we don't need to fix the root (because it is always set to black)
    + if (node == root || !keepGoing) {
    + return false;
    + }
    + }
    +
    +
    + // Do we need to fix this node? Only if there are two reds right under each
    + // other.
    + if (isRed(node) && isRed(parent)) {
    + if (parent == getLeft(grandparent)) {
    + int uncle = getRight(grandparent);
    + if (isRed(uncle)) {
    + // case 1.1
    + setRed(parent, false);
    + setRed(uncle, false);
    + setRed(grandparent, true);
    + return true;
    + } else {
    + if (node == getRight(parent)) {
    + // case 1.2
    + // swap node and parent
    + int tmp = node;
    + node = parent;
    + parent = tmp;
    + // left-rotate on node
    + setLeft(grandparent, parent);
    + setRight(node, getLeft(parent));
    + setLeft(parent, node);
    + }
    +
    + // case 1.2 and 1.3
    + setRed(parent, false);
    + setRed(grandparent, true);
    +
    + // right-rotate on grandparent
    + if (greatGrandparent == NULL) {
    + root = parent;
    + } else if (getLeft(greatGrandparent) == grandparent) {
    + setLeft(greatGrandparent, parent);
    + } else {
    + setRight(greatGrandparent, parent);
    + }
    + setLeft(grandparent, getRight(parent));
    + setRight(parent, grandparent);
    + return false;
    + }
    + } else {
    + int uncle = getLeft(grandparent);
    + if (isRed(uncle)) {
    + // case 2.1
    + setRed(parent, false);
    + setRed(uncle, false);
    + setRed(grandparent, true);
    + return true;
    + } else {
    + if (node == getLeft(parent)) {
    + // case 2.2
    + // swap node and parent
    + int tmp = node;
    + node = parent;
    + parent = tmp;
    + // right-rotate on node
    + setRight(grandparent, parent);
    + setLeft(node, getRight(parent));
    + setRight(parent, node);
    + }
    + // case 2.2 and 2.3
    + setRed(parent, false);
    + setRed(grandparent, true);
    + // left-rotate on grandparent
    + if (greatGrandparent == NULL) {
    + root = parent;
    + } else if (getRight(greatGrandparent) == grandparent) {
    + setRight(greatGrandparent, parent);
    + } else {
    + setLeft(greatGrandparent, parent);
    + }
    + setRight(grandparent, getLeft(parent));
    + setLeft(parent, grandparent);
    + return false;
    + }
    + }
    + } else {
    + return true;
    + }
    + }
    +
    + /**
    + * Add the new key to the tree.
    + * @return true if the element is a new one.
    + */
    + protected boolean add() {
    + add(root, false, NULL, NULL, NULL);
    + if (wasAdd) {
    + setRed(root, false);
    + return true;
    + } else {
    + return false;
    + }
    + }
    +
    + /**
    + * Get the number of elements in the set.
    + */
    + public int size() {
    + return size;
    + }
    +
    + /**
    + * Reset the table to empty.
    + */
    + public void clear() {
    + root = NULL;
    + size = 0;
    + data.clear();
    + }
    +
    +}
    +

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,111 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.EOFException;
    +import java.io.IOException;
    +
    +/**
    + * A reader that reads a sequence of bytes. A control byte is read before
    + * each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
    + * byte is -1 to -128, 1 to 128 literal byte values follow.
    + */
    +class RunLengthByteReader {
    + private final InStream input;
    + private final byte[] literals =
    + new byte[RunLengthByteWriter.MAX_LITERAL_SIZE];
    + private int numLiterals = 0;
    + private int used = 0;
    + private boolean repeat = false;
    +
    + RunLengthByteReader(InStream input) throws IOException {
    + this.input = input;
    + }
    +
    + private void readValues() throws IOException {
    + int control = input.read();
    + used = 0;
    + if (control == -1) {
    + throw new EOFException("Read past end of buffer RLE byte from " + input);
    + } else if (control < 0x80) {
    + repeat = true;
    + numLiterals = control + RunLengthByteWriter.MIN_REPEAT_SIZE;
    + int val = input.read();
    + if (val == -1) {
    + throw new EOFException("Reading RLE byte got EOF");
    + }
    + literals[0] = (byte) val;
    + } else {
    + repeat = false;
    + numLiterals = 0x100 - control;
    + int bytes = 0;
    + while (bytes < numLiterals) {
    + int result = input.read(literals, bytes, numLiterals - bytes);
    + if (result == -1) {
    + throw new EOFException("Reading RLE byte literal got EOF");
    + }
    + bytes += result;
    + }
    + }
    + }
    +
    + boolean hasNext() throws IOException {
    + return used != numLiterals || input.available() > 0;
    + }
    +
    + byte next() throws IOException {
    + byte result;
    + if (used == numLiterals) {
    + readValues();
    + }
    + if (repeat) {
    + used += 1;
    + result = literals[0];
    + } else {
    + result = literals[used++];
    + }
    + return result;
    + }
    +
    + void seek(PositionProvider index) throws IOException {
    + input.seek(index);
    + int consumed = (int) index.getNext();
    + if (consumed != 0) {
    + // a loop is required for cases where we break the run into two parts
    + while (consumed > 0) {
    + readValues();
    + used = consumed;
    + consumed -= numLiterals;
    + }
    + } else {
    + used = 0;
    + numLiterals = 0;
    + }
    + }
    +
    + void skip(long items) throws IOException {
    + while (items > 0) {
    + if (used == numLiterals) {
    + readValues();
    + }
    + long consume = Math.min(items, numLiterals - used);
    + used += consume;
    + items -= consume;
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteWriter.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,106 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +
    +/**
    + * A streamFactory that writes a sequence of bytes. A control byte is written before
    + * each run with positive values 0 to 127 meaning 2 to 129 repetitions. If the
    + * bytes is -1 to -128, 1 to 128 literal byte values follow.
    + */
    +class RunLengthByteWriter {
    + static final int MIN_REPEAT_SIZE = 3;
    + static final int MAX_LITERAL_SIZE = 128;
    + static final int MAX_REPEAT_SIZE= 127 + MIN_REPEAT_SIZE;
    + private final PositionedOutputStream output;
    + private final byte[] literals = new byte[MAX_LITERAL_SIZE];
    + private int numLiterals = 0;
    + private boolean repeat = false;
    + private int tailRunLength = 0;
    +
    + RunLengthByteWriter(PositionedOutputStream output) {
    + this.output = output;
    + }
    +
    + private void writeValues() throws IOException {
    + if (numLiterals != 0) {
    + if (repeat) {
    + output.write(numLiterals - MIN_REPEAT_SIZE);
    + output.write(literals, 0, 1);
    + } else {
    + output.write(-numLiterals);
    + output.write(literals, 0, numLiterals);
    + }
    + repeat = false;
    + tailRunLength = 0;
    + numLiterals = 0;
    + }
    + }
    +
    + void flush() throws IOException {
    + writeValues();
    + output.flush();
    + }
    +
    + void write(byte value) throws IOException {
    + if (numLiterals == 0) {
    + literals[numLiterals++] = value;
    + tailRunLength = 1;
    + } else if (repeat) {
    + if (value == literals[0]) {
    + numLiterals += 1;
    + if (numLiterals == MAX_REPEAT_SIZE) {
    + writeValues();
    + }
    + } else {
    + writeValues();
    + literals[numLiterals++] = value;
    + tailRunLength = 1;
    + }
    + } else {
    + if (value == literals[numLiterals - 1]) {
    + tailRunLength += 1;
    + } else {
    + tailRunLength = 1;
    + }
    + if (tailRunLength == MIN_REPEAT_SIZE) {
    + if (numLiterals + 1 == MIN_REPEAT_SIZE) {
    + repeat = true;
    + numLiterals += 1;
    + } else {
    + numLiterals -= MIN_REPEAT_SIZE - 1;
    + writeValues();
    + literals[0] = value;
    + repeat = true;
    + numLiterals = MIN_REPEAT_SIZE;
    + }
    + } else {
    + literals[numLiterals++] = value;
    + if (numLiterals == MAX_LITERAL_SIZE) {
    + writeValues();
    + }
    + }
    + }
    + }
    +
    + void getPosition(PositionRecorder recorder) throws IOException {
    + output.getPosition(recorder);
    + recorder.addPosition(numLiterals);
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,117 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.EOFException;
    +import java.io.IOException;
    +
    +/**
    + * A reader that reads a sequence of integers.
    + * */
    +class RunLengthIntegerReader {
    + private final InStream input;
    + private final boolean signed;
    + private final long[] literals =
    + new long[RunLengthIntegerWriter.MAX_LITERAL_SIZE];
    + private int numLiterals = 0;
    + private int delta = 0;
    + private int used = 0;
    + private boolean repeat = false;
    +
    + RunLengthIntegerReader(InStream input, boolean signed) throws IOException {
    + this.input = input;
    + this.signed = signed;
    + }
    +
    + private void readValues() throws IOException {
    + int control = input.read();
    + if (control == -1) {
    + throw new EOFException("Read past end of RLE integer from " + input);
    + } else if (control < 0x80) {
    + numLiterals = control + RunLengthIntegerWriter.MIN_REPEAT_SIZE;
    + used = 0;
    + repeat = true;
    + delta = input.read();
    + if (delta == -1) {
    + throw new EOFException("End of stream in RLE Integer from " + input);
    + }
    + // convert from 0 to 255 to -128 to 127 by converting to a signed byte
    + delta = (byte) (0 + delta);
    + if (signed) {
    + literals[0] = SerializationUtils.readVslong(input);
    + } else {
    + literals[0] = SerializationUtils.readVulong(input);
    + }
    + } else {
    + repeat = false;
    + numLiterals = 0x100 - control;
    + used = 0;
    + for(int i=0; i < numLiterals; ++i) {
    + if (signed) {
    + literals[i] = SerializationUtils.readVslong(input);
    + } else {
    + literals[i] = SerializationUtils.readVulong(input);
    + }
    + }
    + }
    + }
    +
    + boolean hasNext() throws IOException {
    + return used != numLiterals || input.available() > 0;
    + }
    +
    + long next() throws IOException {
    + long result;
    + if (used == numLiterals) {
    + readValues();
    + }
    + if (repeat) {
    + result = literals[0] + (used++) * delta;
    + } else {
    + result = literals[used++];
    + }
    + return result;
    + }
    +
    + void seek(PositionProvider index) throws IOException {
    + input.seek(index);
    + int consumed = (int) index.getNext();
    + if (consumed != 0) {
    + // a loop is required for cases where we break the run into two parts
    + while (consumed > 0) {
    + readValues();
    + used = consumed;
    + consumed -= numLiterals;
    + }
    + } else {
    + used = 0;
    + numLiterals = 0;
    + }
    + }
    +
    + void skip(long numValues) throws IOException {
    + while (numValues > 0) {
    + if (used == numLiterals) {
    + readValues();
    + }
    + long consume = Math.min(numValues, numLiterals - used);
    + used += consume;
    + numValues -= consume;
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriter.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,137 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +
    +/**
    + * A streamFactory that writes a sequence of integers. A control byte is written before
    + * each run with positive values 0 to 127 meaning 3 to 130 repetitions, each
    + * repetition is offset by a delta. If the control byte is -1 to -128, 1 to 128
    + * literal vint values follow.
    + */
    +class RunLengthIntegerWriter {
    + static final int MIN_REPEAT_SIZE = 3;
    + static final int MAX_DELTA = 127;
    + static final int MIN_DELTA = -128;
    + static final int MAX_LITERAL_SIZE = 128;
    + private static final int MAX_REPEAT_SIZE = 127 + MIN_REPEAT_SIZE;
    + private final PositionedOutputStream output;
    + private final boolean signed;
    + private final long[] literals = new long[MAX_LITERAL_SIZE];
    + private int numLiterals = 0;
    + private long delta = 0;
    + private boolean repeat = false;
    + private int tailRunLength = 0;
    +
    + RunLengthIntegerWriter(PositionedOutputStream output,
    + boolean signed) {
    + this.output = output;
    + this.signed = signed;
    + }
    +
    + private void writeValues() throws IOException {
    + if (numLiterals != 0) {
    + if (repeat) {
    + output.write(numLiterals - MIN_REPEAT_SIZE);
    + output.write((byte) delta);
    + if (signed) {
    + SerializationUtils.writeVslong(output, literals[0]);
    + } else {
    + SerializationUtils.writeVulong(output, literals[0]);
    + }
    + } else {
    + output.write(-numLiterals);
    + for(int i=0; i < numLiterals; ++i) {
    + if (signed) {
    + SerializationUtils.writeVslong(output, literals[i]);
    + } else {
    + SerializationUtils.writeVulong(output, literals[i]);
    + }
    + }
    + }
    + repeat = false;
    + numLiterals = 0;
    + tailRunLength = 0;
    + }
    + }
    +
    + void flush() throws IOException {
    + writeValues();
    + output.flush();
    + }
    +
    + void write(long value) throws IOException {
    + if (numLiterals == 0) {
    + literals[numLiterals++] = value;
    + tailRunLength = 1;
    + } else if (repeat) {
    + if (value == literals[0] + delta * numLiterals) {
    + numLiterals += 1;
    + if (numLiterals == MAX_REPEAT_SIZE) {
    + writeValues();
    + }
    + } else {
    + writeValues();
    + literals[numLiterals++] = value;
    + tailRunLength = 1;
    + }
    + } else {
    + if (tailRunLength == 1) {
    + delta = value - literals[numLiterals - 1];
    + if (delta < MIN_DELTA || delta > MAX_DELTA) {
    + tailRunLength = 1;
    + } else {
    + tailRunLength = 2;
    + }
    + } else if (value == literals[numLiterals - 1] + delta) {
    + tailRunLength += 1;
    + } else {
    + delta = value - literals[numLiterals - 1];
    + if (delta < MIN_DELTA || delta > MAX_DELTA) {
    + tailRunLength = 1;
    + } else {
    + tailRunLength = 2;
    + }
    + }
    + if (tailRunLength == MIN_REPEAT_SIZE) {
    + if (numLiterals + 1 == MIN_REPEAT_SIZE) {
    + repeat = true;
    + numLiterals += 1;
    + } else {
    + numLiterals -= MIN_REPEAT_SIZE - 1;
    + long base = literals[numLiterals];
    + writeValues();
    + literals[0] = base;
    + repeat = true;
    + numLiterals = MIN_REPEAT_SIZE;
    + }
    + } else {
    + literals[numLiterals++] = value;
    + if (numLiterals == MAX_LITERAL_SIZE) {
    + writeValues();
    + }
    + }
    + }
    + }
    +
    + void getPosition(PositionRecorder recorder) throws IOException {
    + output.getPosition(recorder);
    + recorder.addPosition(numLiterals);
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,106 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.EOFException;
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.io.OutputStream;
    +
    +final class SerializationUtils {
    +
    + // unused
    + private SerializationUtils() {}
    +
    + static void writeVulong(OutputStream output, long value) throws IOException {
    + while (true) {
    + if ((value & ~0x7f) == 0) {
    + output.write((byte) value);
    + return;
    + } else {
    + output.write((byte) (0x80 | (value & 0x7f)));
    + value >>>= 7;
    + }
    + }
    + }
    +
    + static void writeVslong(OutputStream output, long value) throws IOException {
    + writeVulong(output, (value << 1) ^ (value >> 63));
    + }
    +
    +
    + static long readVulong(InputStream in) throws IOException {
    + long result = 0;
    + long b;
    + int offset = 0;
    + do {
    + b = in.read();
    + if (b == -1) {
    + throw new EOFException("Reading Vulong past EOF");
    + }
    + result |= (0x7f & b) << offset;
    + offset += 7;
    + } while (b >= 0x80);
    + return result;
    + }
    +
    + static long readVslong(InputStream in) throws IOException {
    + long result = readVulong(in);
    + return (result >>> 1) ^ -(result & 1);
    + }
    +
    + static float readFloat(InputStream in) throws IOException {
    + int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
    + (in.read() << 24);
    + return Float.intBitsToFloat(ser);
    + }
    +
    + static void writeFloat(OutputStream output, float value) throws IOException {
    + int ser = Float.floatToIntBits(value);
    + output.write(ser & 0xff);
    + output.write((ser >> 8) & 0xff);
    + output.write((ser >> 16) & 0xff);
    + output.write((ser >> 24) & 0xff);
    + }
    +
    + static double readDouble(InputStream in) throws IOException {
    + long ser = (long) in.read() |
    + ((long) in.read() << 8) |
    + ((long) in.read() << 16) |
    + ((long) in.read() << 24) |
    + ((long) in.read() << 32) |
    + ((long) in.read() << 40) |
    + ((long) in.read() << 48) |
    + ((long) in.read() << 56);
    + return Double.longBitsToDouble(ser);
    + }
    +
    + static void writeDouble(OutputStream output,
    + double value) throws IOException {
    + long ser = Double.doubleToLongBits(value);
    + output.write(((int) ser) & 0xff);
    + output.write(((int) (ser >> 8)) & 0xff);
    + output.write(((int) (ser >> 16)) & 0xff);
    + output.write(((int) (ser >> 24)) & 0xff);
    + output.write(((int) (ser >> 32)) & 0xff);
    + output.write(((int) (ser >> 40)) & 0xff);
    + output.write(((int) (ser >> 48)) & 0xff);
    + output.write(((int) (ser >> 56)) & 0xff);
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SnappyCodec.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,67 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.iq80.snappy.Snappy;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +
    +class SnappyCodec implements CompressionCodec {
    +
    + @Override
    + public boolean compress(ByteBuffer in, ByteBuffer out,
    + ByteBuffer overflow) throws IOException {
    + int inBytes = in.remaining();
    + // I should work on a patch for Snappy to support an overflow buffer
    + // to prevent the extra buffer copy.
    + byte[] compressed = new byte[Snappy.maxCompressedLength(inBytes)];
    + int outBytes =
    + Snappy.compress(in.array(), in.arrayOffset() + in.position(), inBytes,
    + compressed, 0);
    + if (outBytes < inBytes) {
    + int remaining = out.remaining();
    + if (remaining >= outBytes) {
    + System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
    + out.position(), outBytes);
    + out.position(out.position() + outBytes);
    + } else {
    + System.arraycopy(compressed, 0, out.array(), out.arrayOffset() +
    + out.position(), remaining);
    + out.position(out.limit());
    + System.arraycopy(compressed, remaining, overflow.array(),
    + overflow.arrayOffset(), outBytes - remaining);
    + overflow.position(outBytes - remaining);
    + }
    + return true;
    + } else {
    + return false;
    + }
    + }
    +
    + @Override
    + public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
    + int inOffset = in.position();
    + int uncompressLen =
    + Snappy.uncompress(in.array(), in.arrayOffset() + inOffset,
    + in.limit() - inOffset, out.array(), out.arrayOffset() + out.position());
    + out.position(uncompressLen + out.position());
    + out.flip();
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StreamName.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,94 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * The name of a stream within a stripe.
    + */
    +class StreamName implements Comparable<StreamName> {
    + private final int column;
    + private final OrcProto.Stream.Kind kind;
    +
    + public static enum Area {
    + DATA, INDEX
    + }
    +
    + public StreamName(int column, OrcProto.Stream.Kind kind) {
    + this.column = column;
    + this.kind = kind;
    + }
    +
    + public boolean equals(Object obj) {
    + if (obj != null && obj instanceof StreamName) {
    + StreamName other = (StreamName) obj;
    + return other.column == column && other.kind == kind;
    + } else {
    + return false;
    + }
    + }
    +
    + @Override
    + public int compareTo(StreamName streamName) {
    + if (streamName == null) {
    + return -1;
    + }
    + Area area = getArea(kind);
    + Area otherArea = streamName.getArea(streamName.kind);
    + if (area != otherArea) {
    + return -area.compareTo(otherArea);
    + }
    + if (column != streamName.column) {
    + return column < streamName.column ? -1 : 1;
    + }
    + return kind.compareTo(streamName.kind);
    + }
    +
    + public int getColumn() {
    + return column;
    + }
    +
    + public OrcProto.Stream.Kind getKind() {
    + return kind;
    + }
    +
    + public Area getArea() {
    + return getArea(kind);
    + }
    +
    + public static Area getArea(OrcProto.Stream.Kind kind) {
    + switch (kind) {
    + case ROW_INDEX:
    + case DICTIONARY_COUNT:
    + return Area.INDEX;
    + default:
    + return Area.DATA;
    + }
    + }
    +
    + @Override
    + public String toString() {
    + return "Stream for column " + column + " kind " + kind;
    + }
    +
    + @Override
    + public int hashCode() {
    + return column * 101 + kind.getNumber();
    + }
    +}
    +

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringColumnStatistics.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,35 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Statistics for string columns.
    + */
    +public interface StringColumnStatistics extends ColumnStatistics {
    + /**
    + * Get the minimum string.
    + * @return the minimum
    + */
    + String getMinimum();
    +
    + /**
    + * Get the maximum string.
    + * @return the maximum
    + */
    + String getMaximum();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,176 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.io.Text;
    +
    +import java.io.IOException;
    +import java.io.OutputStream;
    +
    +/**
    + * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
    + * and an offset/length for each entry.
    + */
    +class StringRedBlackTree extends RedBlackTree {
    + private final DynamicByteArray byteArray = new DynamicByteArray();
    + private final DynamicIntArray keySizes = new DynamicIntArray();
    + private final Text newKey = new Text();
    +
    + public StringRedBlackTree() {
    + // PASS
    + }
    +
    + public StringRedBlackTree(int initialCapacity) {
    + super(initialCapacity);
    + }
    +
    + public int add(String value) {
    + newKey.set(value);
    + // if the key is new, add it to our byteArray and store the offset & length
    + if (add()) {
    + int len = newKey.getLength();
    + keySizes.add(byteArray.add(newKey.getBytes(), 0, len));
    + keySizes.add(len);
    + }
    + return lastAdd;
    + }
    +
    + @Override
    + protected int compareValue(int position) {
    + return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
    + keySizes.get(2 * position), keySizes.get(2 * position + 1));
    + }
    +
    + /**
    + * The information about each node.
    + */
    + public interface VisitorContext {
    + /**
    + * Get the position where the key was originally added.
    + * @return the number returned by add.
    + */
    + int getOriginalPosition();
    +
    + /**
    + * Write the bytes for the string to the given output stream.
    + * @param out the stream to write to.
    + * @throws IOException
    + */
    + void writeBytes(OutputStream out) throws IOException;
    +
    + /**
    + * Get the original string.
    + * @return the string
    + */
    + Text getText();
    +
    + /**
    + * Get the number of bytes.
    + * @return the string's length in bytes
    + */
    + int getLength();
    +
    + /**
    + * Get the count for this key.
    + * @return the number of times this key was added
    + */
    + int getCount();
    + }
    +
    + /**
    + * The interface for visitors.
    + */
    + public interface Visitor {
    + /**
    + * Called once for each node of the tree in sort order.
    + * @param context the information about each node
    + * @throws IOException
    + */
    + void visit(VisitorContext context) throws IOException;
    + }
    +
    + private class VisitorContextImpl implements VisitorContext {
    + private int originalPosition;
    + private final Text text = new Text();
    +
    + public int getOriginalPosition() {
    + return originalPosition;
    + }
    +
    + public Text getText() {
    + byteArray.setText(text, keySizes.get(originalPosition * 2), getLength());
    + return text;
    + }
    +
    + public void writeBytes(OutputStream out) throws IOException {
    + byteArray.write(out, keySizes.get(originalPosition * 2), getLength());
    + }
    +
    + public int getLength() {
    + return keySizes.get(originalPosition * 2 + 1);
    + }
    +
    + public int getCount() {
    + return StringRedBlackTree.this.getCount(originalPosition);
    + }
    + }
    +
    + private void recurse(int node, Visitor visitor, VisitorContextImpl context
    + ) throws IOException {
    + if (node != NULL) {
    + recurse(getLeft(node), visitor, context);
    + context.originalPosition = node;
    + visitor.visit(context);
    + recurse(getRight(node), visitor, context);
    + }
    + }
    +
    + /**
    + * Visit all of the nodes in the tree in sorted order.
    + * @param visitor the action to be applied to each ndoe
    + * @throws IOException
    + */
    + public void visit(Visitor visitor) throws IOException {
    + recurse(root, visitor, new VisitorContextImpl());
    + }
    +
    + /**
    + * Reset the table to empty.
    + */
    + public void clear() {
    + super.clear();
    + byteArray.clear();
    + keySizes.clear();
    + }
    +
    + /**
    + * Get the size of the character data in the table.
    + * @return the bytes used by the table
    + */
    + public int getCharacterSize() {
    + return byteArray.size();
    + }
    +
    + /**
    + * Calculate the approximate size in memory.
    + * @return the number of bytes used in storing the tree.
    + */
    + public long getByteSize() {
    + return byteArray.size() + 5 * 4 * size();
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StripeInformation.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,53 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Information about the stripes in an ORC file that is provided by the Reader.
    + */
    +public interface StripeInformation {
    + /**
    + * Get the byte offset of the start of the stripe.
    + * @return the bytes from the start of the file
    + */
    + long getOffset();
    +
    + /**
    + * Get the length of the stripe's indexes.
    + * @return the number of bytes in the index
    + */
    + long getIndexLength();
    +
    + /**
    + * Get the length of the stripe's data.
    + * @return the number of bytes in the stripe
    + */
    + long getDataLength();
    +
    + /**
    + * Get the length of the stripe's tail section, which contains its index.
    + * @return the number of bytes in the tail
    + */
    + long getFooterLength();
    +
    + /**
    + * Get the number of rows in the stripe.
    + * @return a count of the number of rows
    + */
    + long getNumberOfRows();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,50 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +
    +/**
    + * The interface for writing ORC files.
    + */
    +public interface Writer {
    + /**
    + * Add arbitrary meta-data to the ORC file. This may be called at any point
    + * until the Writer is closed. If the same key is passed a second time, the
    + * second value will replace the first.
    + * @param key a key to label the data with.
    + * @param value the contents of the metadata.
    + */
    + void addUserMetadata(String key, ByteBuffer value);
    +
    + /**
    + * Add a row to the ORC file.
    + * @param row the row to add
    + * @throws IOException
    + */
    + void addRow(Object row) throws IOException;
    +
    + /**
    + * Flush all of the buffers and close the file. No methods on this writer
    + * should be called afterwards.
    + * @throws IOException
    + */
    + void close() throws IOException;
    +}
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,395 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
    +import org.apache.hadoop.io.Writable;
    +
    +import java.io.DataInput;
    +import java.io.DataOutput;
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Map;
    +
    +final class OrcStruct implements Writable {
    +
    + private final Object[] fields;
    +
    + OrcStruct(int children) {
    + fields = new Object[children];
    + }
    +
    + Object getFieldValue(int fieldIndex) {
    + return fields[fieldIndex];
    + }
    +
    + void setFieldValue(int fieldIndex, Object value) {
    + fields[fieldIndex] = value;
    + }
    +
    + @Override
    + public void write(DataOutput dataOutput) throws IOException {
    + throw new UnsupportedOperationException("write unsupported");
    + }
    +
    + @Override
    + public void readFields(DataInput dataInput) throws IOException {
    + throw new UnsupportedOperationException("readFields unsupported");
    + }
    +
    + @Override
    + public boolean equals(Object other) {
    + if (other == null || other.getClass() != OrcStruct.class) {
    + return false;
    + } else {
    + OrcStruct oth = (OrcStruct) other;
    + if (fields.length != oth.fields.length) {
    + return false;
    + }
    + for(int i=0; i < fields.length; ++i) {
    + if (fields[i] == null) {
    + if (oth.fields[i] != null) {
    + return false;
    + }
    + } else {
    + if (!fields[i].equals(oth.fields[i])) {
    + return false;
    + }
    + }
    + }
    + return true;
    + }
    + }
    +
    + @Override
    + public int hashCode() {
    + int result = fields.length;
    + for(Object field: fields) {
    + if (field != null) {
    + result ^= field.hashCode();
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + public String toString() {
    + StringBuilder buffer = new StringBuilder();
    + buffer.append("{");
    + for(int i=0; i < fields.length; ++i) {
    + if (i != 0) {
    + buffer.append(", ");
    + }
    + buffer.append(fields[i]);
    + }
    + buffer.append("}");
    + return buffer.toString();
    + }
    +
    + static class Field implements StructField {
    + private final String name;
    + private final ObjectInspector inspector;
    + private final int offset;
    +
    + Field(String name, ObjectInspector inspector, int offset) {
    + this.name = name;
    + this.inspector = inspector;
    + this.offset = offset;
    + }
    +
    + @Override
    + public String getFieldName() {
    + return name;
    + }
    +
    + @Override
    + public ObjectInspector getFieldObjectInspector() {
    + return inspector;
    + }
    +
    + @Override
    + public String getFieldComment() {
    + return null;
    + }
    + }
    +
    + static class OrcStructInspector extends StructObjectInspector {
    + private final List<StructField> fields;
    +
    + OrcStructInspector(StructTypeInfo info) {
    + ArrayList<String> fieldNames = info.getAllStructFieldNames();
    + ArrayList<TypeInfo> fieldTypes = info.getAllStructFieldTypeInfos();
    + fields = new ArrayList<StructField>(fieldNames.size());
    + for(int i=0; i < fieldNames.size(); ++i) {
    + fields.add(new Field(fieldNames.get(i),
    + createObjectInspector(fieldTypes.get(i)), i));
    + }
    + }
    +
    + OrcStructInspector(int columnId, List<OrcProto.Type> types) {
    + OrcProto.Type type = types.get(columnId);
    + int fieldCount = type.getSubtypesCount();
    + fields = new ArrayList<StructField>(fieldCount);
    + for(int i=0; i < fieldCount; ++i) {
    + int fieldType = type.getSubtypes(i);
    + fields.add(new Field(type.getFieldNames(i),
    + createObjectInspector(fieldType, types), i));
    + }
    + }
    +
    + @Override
    + public List<StructField> getAllStructFieldRefs() {
    + return fields;
    + }
    +
    + @Override
    + public StructField getStructFieldRef(String s) {
    + for(StructField field: fields) {
    + if (field.getFieldName().equals(s)) {
    + return field;
    + }
    + }
    + return null;
    + }
    +
    + @Override
    + public Object getStructFieldData(Object object, StructField field) {
    + return ((OrcStruct) object).fields[((Field) field).offset];
    + }
    +
    + @Override
    + public List<Object> getStructFieldsDataAsList(Object object) {
    + OrcStruct struct = (OrcStruct) object;
    + List<Object> result = new ArrayList<Object>(struct.fields.length);
    + for (Object child: struct.fields) {
    + result.add(child);
    + }
    + return result;
    + }
    +
    + @Override
    + public String getTypeName() {
    + StringBuilder buffer = new StringBuilder();
    + buffer.append("struct<");
    + for(int i=0; i < fields.size(); ++i) {
    + StructField field = fields.get(i);
    + if (i != 0) {
    + buffer.append(",");
    + }
    + buffer.append(field.getFieldName());
    + buffer.append(":");
    + buffer.append(field.getFieldObjectInspector().getTypeName());
    + }
    + buffer.append(">");
    + return buffer.toString();
    + }
    +
    + @Override
    + public Category getCategory() {
    + return Category.STRUCT;
    + }
    + }
    +
    + static class OrcMapObjectInspector implements MapObjectInspector {
    + private final ObjectInspector key;
    + private final ObjectInspector value;
    +
    + OrcMapObjectInspector(MapTypeInfo info) {
    + key = createObjectInspector(info.getMapKeyTypeInfo());
    + value = createObjectInspector(info.getMapValueTypeInfo());
    + }
    +
    + OrcMapObjectInspector(int columnId, List<OrcProto.Type> types) {
    + OrcProto.Type type = types.get(columnId);
    + key = createObjectInspector(type.getSubtypes(0), types);
    + value = createObjectInspector(type.getSubtypes(1), types);
    + }
    +
    + @Override
    + public ObjectInspector getMapKeyObjectInspector() {
    + return key;
    + }
    +
    + @Override
    + public ObjectInspector getMapValueObjectInspector() {
    + return value;
    + }
    +
    + @Override
    + public Object getMapValueElement(Object map, Object key) {
    + return ((Map) map).get(key);
    + }
    +
    + @Override
    + @SuppressWarnings("unchecked")
    + public Map<Object, Object> getMap(Object map) {
    + return (Map) map;
    + }
    +
    + @Override
    + public int getMapSize(Object map) {
    + return ((Map) map).size();
    + }
    +
    + @Override
    + public String getTypeName() {
    + return "map<" + key.getTypeName() + "," + value.getTypeName() + ">";
    + }
    +
    + @Override
    + public Category getCategory() {
    + return Category.MAP;
    + }
    + }
    +
    + static class OrcListObjectInspector implements ListObjectInspector {
    + private final ObjectInspector child;
    +
    + OrcListObjectInspector(ListTypeInfo info) {
    + child = createObjectInspector(info.getListElementTypeInfo());
    + }
    +
    + OrcListObjectInspector(int columnId, List<OrcProto.Type> types) {
    + OrcProto.Type type = types.get(columnId);
    + child = createObjectInspector(type.getSubtypes(0), types);
    + }
    +
    + @Override
    + public ObjectInspector getListElementObjectInspector() {
    + return child;
    + }
    +
    + @Override
    + public Object getListElement(Object list, int i) {
    + return ((List) list).get(i);
    + }
    +
    + @Override
    + public int getListLength(Object list) {
    + return ((List) list).size();
    + }
    +
    + @Override
    + @SuppressWarnings("unchecked")
    + public List<?> getList(Object list) {
    + return (List) list;
    + }
    +
    + @Override
    + public String getTypeName() {
    + return "array<" + child.getTypeName() + ">";
    + }
    +
    + @Override
    + public Category getCategory() {
    + return Category.LIST;
    + }
    + }
    +
    + static ObjectInspector createObjectInspector(TypeInfo info) {
    + switch (info.getCategory()) {
    + case PRIMITIVE:
    + switch (((PrimitiveTypeInfo) info).getPrimitiveCategory()) {
    + case FLOAT:
    + return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
    + case DOUBLE:
    + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
    + case BOOLEAN:
    + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
    + case BYTE:
    + return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
    + case SHORT:
    + return PrimitiveObjectInspectorFactory.writableShortObjectInspector;
    + case INT:
    + return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
    + case LONG:
    + return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
    + case BINARY:
    + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    + case STRING:
    + return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    + case TIMESTAMP:
    + return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
    + default:
    + throw new IllegalArgumentException("Unknown primitive type " +
    + ((PrimitiveTypeInfo) info).getPrimitiveCategory());
    + }
    + case STRUCT:
    + return new OrcStructInspector((StructTypeInfo) info);
    + case UNION:
    + return new OrcUnion.OrcUnionObjectInspector((UnionTypeInfo) info);
    + case MAP:
    + return new OrcMapObjectInspector((MapTypeInfo) info);
    + case LIST:
    + return new OrcListObjectInspector((ListTypeInfo) info);
    + default:
    + throw new IllegalArgumentException("Unknown type " +
    + info.getCategory());
    + }
    + }
    +
    + static ObjectInspector createObjectInspector(int columnId,
    + List<OrcProto.Type> types){
    + OrcProto.Type type = types.get(columnId);
    + switch (type.getKind()) {
    + case FLOAT:
    + return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
    + case DOUBLE:
    + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
    + case BOOLEAN:
    + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
    + case BYTE:
    + return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
    + case SHORT:
    + return PrimitiveObjectInspectorFactory.writableShortObjectInspector;
    + case INT:
    + return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
    + case LONG:
    + return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
    + case BINARY:
    + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    + case STRING:
    + return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    + case TIMESTAMP:
    + return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
    + case STRUCT:
    + return new OrcStructInspector(columnId, types);
    + case UNION:
    + return new OrcUnion.OrcUnionObjectInspector(columnId, types);
    + case MAP:
    + return new OrcMapObjectInspector(columnId, types);
    + case LIST:
    + return new OrcListObjectInspector(columnId, types);
    + default:
    + throw new UnsupportedOperationException("Unknown type " +
    + type.getKind());
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUnion.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,138 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
    +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
    +
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +/**
    + * An in-memory representation of a union type.
    + */
    +final class OrcUnion implements UnionObject {
    + private byte tag;
    + private Object object;
    +
    + void set(byte tag, Object object) {
    + this.tag = tag;
    + this.object = object;
    + }
    +
    + @Override
    + public byte getTag() {
    + return tag;
    + }
    +
    + @Override
    + public Object getObject() {
    + return object;
    + }
    +
    + @Override
    + public boolean equals(Object other) {
    + if (other == null || other.getClass() != OrcUnion.class) {
    + return false;
    + }
    + OrcUnion oth = (OrcUnion) other;
    + if (tag != oth.tag) {
    + return false;
    + } else if (object == null) {
    + return oth.object == null;
    + } else {
    + return object.equals(oth.object);
    + }
    + }
    +
    + @Override
    + public int hashCode() {
    + int result = tag;
    + if (object != null) {
    + result ^= object.hashCode();
    + }
    + return result;
    + }
    +
    + @Override
    + public String toString() {
    + return "union(" + Integer.toString(tag & 0xff) + ", " + object.toString() +
    + ")";
    + }
    +
    + static class OrcUnionObjectInspector implements UnionObjectInspector {
    + private final List<ObjectInspector> children;
    +
    + OrcUnionObjectInspector(int columnId,
    + List<OrcProto.Type> types) {
    + OrcProto.Type type = types.get(columnId);
    + children = new ArrayList<ObjectInspector>(type.getSubtypesCount());
    + for(int i=0; i < type.getSubtypesCount(); ++i) {
    + children.add(OrcStruct.createObjectInspector(type.getSubtypes(i),
    + types));
    + }
    + }
    +
    + OrcUnionObjectInspector(UnionTypeInfo info) {
    + List<TypeInfo> unionChildren = info.getAllUnionObjectTypeInfos();
    + this.children = new ArrayList<ObjectInspector>(unionChildren.size());
    + for(TypeInfo child: info.getAllUnionObjectTypeInfos()) {
    + this.children.add(OrcStruct.createObjectInspector(child));
    + }
    + }
    +
    + @Override
    + public List<ObjectInspector> getObjectInspectors() {
    + return children;
    + }
    +
    + @Override
    + public byte getTag(Object obj) {
    + return ((OrcUnion) obj).tag;
    + }
    +
    + @Override
    + public Object getField(Object obj) {
    + return ((OrcUnion) obj).object;
    + }
    +
    + @Override
    + public String getTypeName() {
    + StringBuilder builder = new StringBuilder("union{");
    + boolean first = true;
    + for(ObjectInspector child: children) {
    + if (first) {
    + first = false;
    + } else {
    + builder.append(", ");
    + }
    + builder.append(child.getTypeName());
    + }
    + builder.append("}");
    + return builder.toString();
    + }
    +
    + @Override
    + public Category getCategory() {
    + return Category.UNION;
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,228 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +
    +class OutStream extends PositionedOutputStream {
    +
    + interface OutputReceiver {
    + void output(ByteBuffer buffer) throws IOException;
    + }
    +
    + static final int HEADER_SIZE = 3;
    + private final String name;
    + private final OutputReceiver receiver;
    + private ByteBuffer compressed = null;
    + private ByteBuffer overflow = null;
    + private ByteBuffer current;
    + private final int bufferSize;
    + private final CompressionCodec codec;
    + private long compressedBytes = 0;
    + private long uncompressedBytes = 0;
    +
    + OutStream(String name,
    + int bufferSize,
    + CompressionCodec codec,
    + OutputReceiver receiver) throws IOException {
    + this.name = name;
    + this.bufferSize = bufferSize;
    + this.codec = codec;
    + this.receiver = receiver;
    + getNewInputBuffer();
    + }
    +
    + public void clear() throws IOException {
    + current.position(codec == null ? 0 : HEADER_SIZE);
    + if (compressed != null) {
    + compressed.clear();
    + }
    + if (overflow != null) {
    + overflow.clear();
    + }
    + }
    +
    + /**
    + * Write the length of the compressed bytes. Life is much easier if the
    + * header is constant length, so just use 3 bytes. Considering most of the
    + * codecs want between 32k (snappy) and 256k (lzo, zlib), 3 bytes should
    + * be plenty. We also use the low bit for whether it is the original or
    + * compressed bytes.
    + * @param buffer the buffer to write the header to
    + * @param position the position in the buffer to write at
    + * @param val the size in the file
    + * @param original is it uncompressed
    + */
    + private static void writeHeader(ByteBuffer buffer,
    + int position,
    + int val,
    + boolean original) {
    + buffer.put(position, (byte) ((val << 1) + (original ? 1 : 0)));
    + buffer.put(position + 1, (byte) (val >> 7));
    + buffer.put(position + 2, (byte) (val >> 15));
    + }
    +
    + private void getNewInputBuffer() throws IOException {
    + if (codec == null) {
    + current = ByteBuffer.allocate(bufferSize);
    + } else {
    + current = ByteBuffer.allocate(bufferSize + HEADER_SIZE);
    + writeHeader(current, 0, bufferSize, true);
    + current.position(HEADER_SIZE);
    + }
    + }
    +
    + private ByteBuffer getNewOutputBuffer() throws IOException {
    + return ByteBuffer.allocate(bufferSize +
    + (codec == null ? 0 : HEADER_SIZE));
    + }
    +
    + private void flip() throws IOException {
    + current.limit(current.position());
    + current.position(codec == null ? 0 : HEADER_SIZE);
    + }
    +
    + @Override
    + public void write(int i) throws IOException {
    + if (current.remaining() < 1) {
    + spill();
    + }
    + uncompressedBytes += 1;
    + current.put((byte) i);
    + }
    +
    + @Override
    + public void write(byte[] bytes, int offset, int length) throws IOException {
    + int remaining = Math.min(current.remaining(), length);
    + current.put(bytes, offset, remaining);
    + uncompressedBytes += remaining;
    + length -= remaining;
    + while (length != 0) {
    + spill();
    + offset += remaining;
    + remaining = Math.min(current.remaining(), length);
    + current.put(bytes, offset, remaining);
    + uncompressedBytes += remaining;
    + length -= remaining;
    + }
    + }
    +
    + private void spill() throws java.io.IOException {
    + // if there isn't anything in the current buffer, don't spill
    + if (current.position() == (codec == null ? 0 : HEADER_SIZE)) {
    + return;
    + }
    + flip();
    + if (codec == null) {
    + receiver.output(current);
    + getNewInputBuffer();
    + } else {
    + if (compressed == null) {
    + compressed = getNewOutputBuffer();
    + } else if (overflow == null) {
    + overflow = getNewOutputBuffer();
    + }
    + int sizePosn = compressed.position();
    + compressed.position(compressed.position() + HEADER_SIZE);
    + if (codec.compress(current, compressed, overflow)) {
    + uncompressedBytes = 0;
    + // move position back to after the header
    + current.position(HEADER_SIZE);
    + current.limit(current.capacity());
    + // find the total bytes in the chunk
    + int totalBytes = compressed.position() - sizePosn - HEADER_SIZE;
    + if (overflow != null) {
    + totalBytes += overflow.position();
    + }
    + compressedBytes += totalBytes + HEADER_SIZE;
    + writeHeader(compressed, sizePosn, totalBytes, false);
    + // if we have less than the next header left, spill it.
    + if (compressed.remaining() < HEADER_SIZE) {
    + compressed.flip();
    + receiver.output(compressed);
    + compressed = overflow;
    + overflow = null;
    + }
    + } else {
    + compressedBytes += uncompressedBytes + HEADER_SIZE;
    + uncompressedBytes = 0;
    + // we are using the original, but need to spill the current
    + // compressed buffer first. So back up to where we started,
    + // flip it and add it to done.
    + if (sizePosn != 0) {
    + compressed.position(sizePosn);
    + compressed.flip();
    + receiver.output(compressed);
    + compressed = null;
    + // if we have an overflow, clear it and make it the new compress
    + // buffer
    + if (overflow != null) {
    + overflow.clear();
    + compressed = overflow;
    + overflow = null;
    + }
    + } else {
    + compressed.clear();
    + if (overflow != null) {
    + overflow.clear();
    + }
    + }
    +
    + // now add the current buffer into the done list and get a new one.
    + current.position(0);
    + // update the header with the current length
    + writeHeader(current, 0, current.limit() - HEADER_SIZE, true);
    + receiver.output(current);
    + getNewInputBuffer();
    + }
    + }
    + }
    +
    + void getPosition(PositionRecorder recorder) throws IOException {
    + if (codec == null) {
    + recorder.addPosition(uncompressedBytes);
    + } else {
    + recorder.addPosition(compressedBytes);
    + recorder.addPosition(uncompressedBytes);
    + }
    + }
    +
    + @Override
    + public void flush() throws IOException {
    + spill();
    + if (compressed != null && compressed.position() != 0) {
    + compressed.flip();
    + receiver.output(compressed);
    + compressed = null;
    + }
    + uncompressedBytes = 0;
    + compressedBytes = 0;
    + }
    +
    + @Override
    + public String toString() {
    + return name;
    + }
    +
    + @Override
    + public long getSize() {
    + return uncompressedBytes + compressedBytes;
    + }
    +}
    +

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionProvider.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,26 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * An interface used for seeking to a row index.
    + */
    +interface PositionProvider {
    + long getNext();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionRecorder.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,25 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * An interface for recording positions in a stream.
    + */
    +interface PositionRecorder {
    + void addPosition(long offset);
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/PositionedOutputStream.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,26 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.io.OutputStream;
    +
    +abstract class PositionedOutputStream extends OutputStream {
    + abstract void getPosition(PositionRecorder recorder) throws IOException;
    + abstract long getSize();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,125 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.util.List;
    +
    +/**
    + * The interface for reading ORC files.
    + *
    + * One Reader can support multiple concurrent RecordReader.
    + */
    +public interface Reader {
    +
    + /**
    + * Get the number of rows in the file.
    + * @return the number of rows
    + */
    + long getNumberOfRows();
    +
    + /**
    + * Get the user metadata keys.
    + * @return the set of metadata keys
    + */
    + Iterable<String> getMetadataKeys();
    +
    + /**
    + * Get a user metadata value.
    + * @param key a key given by the user
    + * @return the bytes associated with the given key
    + */
    + ByteBuffer getMetadataValue(String key);
    +
    + /**
    + * Get the compression kind.
    + * @return the kind of compression in the file
    + */
    + CompressionKind getCompression();
    +
    + /**
    + * Get the buffer size for the compression.
    + * @return number of bytes to buffer for the compression codec.
    + */
    + int getCompressionSize();
    +
    + /**
    + * Get the number of rows per a entry in the row index.
    + * @return the number of rows per an entry in the row index or 0 if there
    + * is no row index.
    + */
    + int getRowIndexStride();
    +
    + /**
    + * Get the list of stripes.
    + * @return the information about the stripes in order
    + */
    + Iterable<StripeInformation> getStripes();
    +
    + /**
    + * Get the object inspector for looking at the objects.
    + * @return an object inspector for each row returned
    + */
    + ObjectInspector getObjectInspector();
    +
    + /**
    + * Get the length of the file.
    + * @return the number of bytes in the file
    + */
    + long getContentLength();
    +
    + /**
    + * Get the statistics about the columns in the file.
    + * @return the information about the column
    + */
    + ColumnStatistics[] getStatistics();
    +
    + /**
    + * Get the list of types contained in the file. The root type is the first
    + * type in the list.
    + * @return the list of flattened types
    + */
    + List<OrcProto.Type> getTypes();
    +
    + /**
    + * Create a RecordReader that will scan the entire file.
    + * @param include true for each column that should be included
    + * @return A new RecordReader
    + * @throws IOException
    + */
    + RecordReader rows(boolean[] include) throws IOException;
    +
    + /**
    + * Create a RecordReader that will start reading at the first stripe after
    + * offset up to the stripe that starts at offset + length. This is intended
    + * to work with MapReduce's FileInputFormat where divisions are picked
    + * blindly, but they must cover all of the rows.
    + * @param offset a byte offset in the file
    + * @param length a number of bytes in the file
    + * @param include true for each column that should be included
    + * @return a new RecordReader that will read the specified rows.
    + * @throws IOException
    + */
    + RecordReader rows(long offset, long length,
    + boolean[] include) throws IOException;
    +
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,247 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import com.google.protobuf.CodedInputStream;
    +import org.apache.hadoop.fs.FSDataInputStream;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.nio.ByteBuffer;
    +import java.util.ArrayList;
    +import java.util.Iterator;
    +import java.util.List;
    +
    +final class ReaderImpl implements Reader {
    +
    + private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
    +
    + private final FileSystem fileSystem;
    + private final Path path;
    + private final CompressionKind compressionKind;
    + private final CompressionCodec codec;
    + private final int bufferSize;
    + private final OrcProto.Footer footer;
    + private final ObjectInspector inspector;
    +
    + private static class StripeInformationImpl
    + implements StripeInformation {
    + private final OrcProto.StripeInformation stripe;
    +
    + StripeInformationImpl(OrcProto.StripeInformation stripe) {
    + this.stripe = stripe;
    + }
    +
    + @Override
    + public long getOffset() {
    + return stripe.getOffset();
    + }
    +
    + @Override
    + public long getDataLength() {
    + return stripe.getDataLength();
    + }
    +
    + @Override
    + public long getFooterLength() {
    + return stripe.getFooterLength();
    + }
    +
    + @Override
    + public long getIndexLength() {
    + return stripe.getIndexLength();
    + }
    +
    + @Override
    + public long getNumberOfRows() {
    + return stripe.getNumberOfRows();
    + }
    +
    + @Override
    + public String toString() {
    + return "offset: " + getOffset() + " data: " + getDataLength() +
    + " rows: " + getNumberOfRows() + " tail: " + getFooterLength() +
    + " index: " + getIndexLength();
    + }
    + }
    +
    + @Override
    + public long getNumberOfRows() {
    + return footer.getNumberOfRows();
    + }
    +
    + @Override
    + public Iterable<String> getMetadataKeys() {
    + List<String> result = new ArrayList<String>();
    + for(OrcProto.UserMetadataItem item: footer.getMetadataList()) {
    + result.add(item.getName());
    + }
    + return result;
    + }
    +
    + @Override
    + public ByteBuffer getMetadataValue(String key) {
    + for(OrcProto.UserMetadataItem item: footer.getMetadataList()) {
    + if (item.hasName() && item.getName().equals(key)) {
    + return item.getValue().asReadOnlyByteBuffer();
    + }
    + }
    + throw new IllegalArgumentException("Can't find user metadata " + key);
    + }
    +
    + @Override
    + public CompressionKind getCompression() {
    + return compressionKind;
    + }
    +
    + @Override
    + public int getCompressionSize() {
    + return bufferSize;
    + }
    +
    + @Override
    + public Iterable<StripeInformation> getStripes() {
    + return new Iterable<org.apache.hadoop.hive.ql.io.orc.StripeInformation>(){
    +
    + @Override
    + public Iterator<org.apache.hadoop.hive.ql.io.orc.StripeInformation> iterator() {
    + return new Iterator<org.apache.hadoop.hive.ql.io.orc.StripeInformation>(){
    + private final Iterator<OrcProto.StripeInformation> inner =
    + footer.getStripesList().iterator();
    +
    + @Override
    + public boolean hasNext() {
    + return inner.hasNext();
    + }
    +
    + @Override
    + public org.apache.hadoop.hive.ql.io.orc.StripeInformation next() {
    + return new StripeInformationImpl(inner.next());
    + }
    +
    + @Override
    + public void remove() {
    + throw new UnsupportedOperationException("remove unsupported");
    + }
    + };
    + }
    + };
    + }
    +
    + @Override
    + public ObjectInspector getObjectInspector() {
    + return inspector;
    + }
    +
    + @Override
    + public long getContentLength() {
    + return footer.getContentLength();
    + }
    +
    + @Override
    + public List<OrcProto.Type> getTypes() {
    + return footer.getTypesList();
    + }
    +
    + @Override
    + public int getRowIndexStride() {
    + return footer.getRowIndexStride();
    + }
    +
    + @Override
    + public ColumnStatistics[] getStatistics() {
    + ColumnStatistics[] result = new ColumnStatistics[footer.getTypesCount()];
    + for(int i=0; i < result.length; ++i) {
    + result[i] = ColumnStatisticsImpl.deserialize(footer.getStatistics(i));
    + }
    + return result;
    + }
    +
    + ReaderImpl(FileSystem fs, Path path) throws IOException {
    + this.fileSystem = fs;
    + this.path = path;
    + FSDataInputStream file = fs.open(path);
    + long size = fs.getFileStatus(path).getLen();
    + int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
    + file.seek(size - readSize);
    + ByteBuffer buffer = ByteBuffer.allocate(readSize);
    + file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
    + buffer.remaining());
    + int psLen = buffer.get(readSize - 1);
    + int psOffset = readSize - 1 - psLen;
    + CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
    + buffer.arrayOffset() + psOffset, psLen);
    + OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
    + int footerSize = (int) ps.getFooterLength();
    + bufferSize = (int) ps.getCompressionBlockSize();
    + switch (ps.getCompression()) {
    + case NONE:
    + compressionKind = CompressionKind.NONE;
    + break;
    + case ZLIB:
    + compressionKind = CompressionKind.ZLIB;
    + break;
    + case SNAPPY:
    + compressionKind = CompressionKind.SNAPPY;
    + break;
    + case LZO:
    + compressionKind = CompressionKind.LZO;
    + break;
    + default:
    + throw new IllegalArgumentException("Unknown compression");
    + }
    + codec = WriterImpl.createCodec(compressionKind);
    + int extra = Math.max(0, psLen + 1 + footerSize - readSize);
    + if (extra > 0) {
    + file.seek(size - readSize - extra);
    + ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
    + file.readFully(extraBuf.array(),
    + extraBuf.arrayOffset() + extraBuf.position(), extra);
    + extraBuf.position(extra);
    + extraBuf.put(buffer);
    + buffer = extraBuf;
    + buffer.position(0);
    + buffer.limit(footerSize);
    + } else {
    + buffer.position(psOffset - footerSize);
    + buffer.limit(psOffset);
    + }
    + InputStream instream = InStream.create("footer", buffer, codec, bufferSize);
    + footer = OrcProto.Footer.parseFrom(instream);
    + inspector = OrcStruct.createObjectInspector(0, footer.getTypesList());
    + file.close();
    + }
    +
    + @Override
    + public RecordReader rows(boolean[] include) throws IOException {
    + return rows(0, Long.MAX_VALUE, include);
    + }
    +
    + @Override
    + public RecordReader rows(long offset, long length, boolean[] include
    + ) throws IOException {
    + return new RecordReaderImpl(this.getStripes(), fileSystem, path, offset,
    + length, footer.getTypesList(), codec, bufferSize,
    + include, footer.getRowIndexStride());
    + }
    +
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,66 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +
    +/**
    + * A row-by-row iterator for ORC files.
    + */
    +public interface RecordReader {
    + /**
    + * Does the reader have more rows available.
    + * @return true if there are more rows
    + * @throws java.io.IOException
    + */
    + boolean hasNext() throws IOException;
    +
    + /**
    + * Read the next row.
    + * @param previous a row object that can be reused by the reader
    + * @return the row that was read
    + * @throws java.io.IOException
    + */
    + Object next(Object previous) throws IOException;
    +
    + /**
    + * Get the row number of the row that will be returned by the following
    + * call to next().
    + * @return the row number from 0 to the number of rows in the file
    + * @throws java.io.IOException
    + */
    + long getRowNumber() throws IOException;
    +
    + /**
    + * Get the progress of the reader through the rows.
    + * @return a fraction between 0.0 and 1.0 of rows read
    + * @throws java.io.IOException
    + */
    + float getProgress() throws IOException;
    +
    + /**
    + * Release the resources associated with the given reader.
    + * @throws java.io.IOException
    + */
    + void close() throws IOException;
    +
    + /**
    + * Seek to a particular row number.
    + */
    + void seekToRow(long rowCount) throws IOException;
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,1238 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.fs.FSDataInputStream;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.io.ByteWritable;
    +import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    +import org.apache.hadoop.hive.serde2.io.ShortWritable;
    +import org.apache.hadoop.io.BooleanWritable;
    +import org.apache.hadoop.io.BytesWritable;
    +import org.apache.hadoop.io.FloatWritable;
    +import org.apache.hadoop.io.IntWritable;
    +import org.apache.hadoop.io.LongWritable;
    +import org.apache.hadoop.io.Text;
    +
    +import java.io.EOFException;
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.sql.Timestamp;
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +
    +class RecordReaderImpl implements RecordReader {
    + private final FSDataInputStream file;
    + private final long firstRow;
    + private final List<StripeInformation> stripes =
    + new ArrayList<StripeInformation>();
    + private OrcProto.StripeFooter stripeFooter;
    + private final long totalRowCount;
    + private final CompressionCodec codec;
    + private final int bufferSize;
    + private final boolean[] included;
    + private final long rowIndexStride;
    + private long rowInStripe = 0;
    + private int currentStripe = 0;
    + private long rowBaseInStripe = 0;
    + private long rowCountInStripe = 0;
    + private final Map<StreamName, InStream> streams =
    + new HashMap<StreamName, InStream>();
    + private final TreeReader reader;
    + private final OrcProto.RowIndex[] indexes;
    +
    + RecordReaderImpl(Iterable<StripeInformation> stripes,
    + FileSystem fileSystem,
    + Path path,
    + long offset, long length,
    + List<OrcProto.Type> types,
    + CompressionCodec codec,
    + int bufferSize,
    + boolean[] included,
    + long strideRate
    + ) throws IOException {
    + this.file = fileSystem.open(path);
    + this.codec = codec;
    + this.bufferSize = bufferSize;
    + this.included = included;
    + long rows = 0;
    + long skippedRows = 0;
    + for(StripeInformation stripe: stripes) {
    + long stripeStart = stripe.getOffset();
    + if (offset > stripeStart) {
    + skippedRows += stripe.getNumberOfRows();
    + } else if (stripeStart < offset + length) {
    + this.stripes.add(stripe);
    + rows += stripe.getNumberOfRows();
    + }
    + }
    + firstRow = skippedRows;
    + totalRowCount = rows;
    + reader = createTreeReader(0, types, included);
    + indexes = new OrcProto.RowIndex[types.size()];
    + rowIndexStride = strideRate;
    + if (this.stripes.size() > 0) {
    + readStripe();
    + }
    + }
    +
    + private static final class PositionProviderImpl implements PositionProvider {
    + private final OrcProto.RowIndexEntry entry;
    + private int index = 0;
    +
    + PositionProviderImpl(OrcProto.RowIndexEntry entry) {
    + this.entry = entry;
    + }
    +
    + @Override
    + public long getNext() {
    + return entry.getPositions(index++);
    + }
    + }
    +
    + private abstract static class TreeReader {
    + protected final int columnId;
    + private BitFieldReader present = null;
    + protected boolean valuePresent = false;
    +
    + TreeReader(int columnId) {
    + this.columnId = columnId;
    + }
    +
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encoding
    + ) throws IOException {
    + InStream in = streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.PRESENT));
    + if (in == null) {
    + present = null;
    + valuePresent = true;
    + } else {
    + present = new BitFieldReader(in, 1);
    + }
    + }
    +
    + /**
    + * Seek to the given position.
    + * @param index the indexes loaded from the file
    + * @throws IOException
    + */
    + void seek(PositionProvider[] index) throws IOException {
    + if (present != null) {
    + present.seek(index[columnId]);
    + }
    + }
    +
    + protected long countNonNulls(long rows) throws IOException {
    + if (present != null) {
    + long result = 0;
    + for(long c=0; c < rows; ++c) {
    + if (present.next() == 1) {
    + result += 1;
    + }
    + }
    + return result;
    + } else {
    + return rows;
    + }
    + }
    +
    + abstract void skipRows(long rows) throws IOException;
    +
    + Object next(Object previous) throws IOException {
    + if (present != null) {
    + valuePresent = present.next() == 1;
    + }
    + return previous;
    + }
    + }
    +
    + private static class BooleanTreeReader extends TreeReader{
    + private BitFieldReader reader = null;
    +
    + BooleanTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + reader = new BitFieldReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA)), 1);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + BooleanWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new BooleanWritable();
    + } else {
    + result = (BooleanWritable) previous;
    + }
    + result.set(reader.next() == 1);
    + }
    + return result;
    + }
    + }
    +
    + private static class ByteTreeReader extends TreeReader{
    + private RunLengthByteReader reader = null;
    +
    + ByteTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + reader = new RunLengthByteReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA)));
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + ByteWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new ByteWritable();
    + } else {
    + result = (ByteWritable) previous;
    + }
    + result.set(reader.next());
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    + }
    +
    + private static class ShortTreeReader extends TreeReader{
    + private RunLengthIntegerReader reader = null;
    +
    + ShortTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + reader = new RunLengthIntegerReader(streams.get(name), true);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + ShortWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new ShortWritable();
    + } else {
    + result = (ShortWritable) previous;
    + }
    + result.set((short) reader.next());
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    + }
    +
    + private static class IntTreeReader extends TreeReader{
    + private RunLengthIntegerReader reader = null;
    +
    + IntTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + reader = new RunLengthIntegerReader(streams.get(name), true);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + IntWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new IntWritable();
    + } else {
    + result = (IntWritable) previous;
    + }
    + result.set((int) reader.next());
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    + }
    +
    + private static class LongTreeReader extends TreeReader{
    + private RunLengthIntegerReader reader = null;
    +
    + LongTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + reader = new RunLengthIntegerReader(streams.get(name), true);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + LongWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new LongWritable();
    + } else {
    + result = (LongWritable) previous;
    + }
    + result.set(reader.next());
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    + }
    +
    + private static class FloatTreeReader extends TreeReader{
    + private InStream stream;
    +
    + FloatTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + stream = streams.get(name);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + stream.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + FloatWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new FloatWritable();
    + } else {
    + result = (FloatWritable) previous;
    + }
    + result.set(SerializationUtils.readFloat(stream));
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + for(int i=0; i < items; ++i) {
    + SerializationUtils.readFloat(stream);
    + }
    + }
    + }
    +
    + private static class DoubleTreeReader extends TreeReader{
    + private InStream stream;
    +
    + DoubleTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name =
    + new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + stream = streams.get(name);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + stream.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + DoubleWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new DoubleWritable();
    + } else {
    + result = (DoubleWritable) previous;
    + }
    + result.set(SerializationUtils.readDouble(stream));
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + stream.skip(items * 8);
    + }
    + }
    +
    + private static class BinaryTreeReader extends TreeReader{
    + private InStream stream;
    + private RunLengthIntegerReader lengths;
    +
    + BinaryTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA);
    + stream = streams.get(name);
    + lengths = new RunLengthIntegerReader(streams.get(new
    + StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
    + false);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + stream.seek(index[columnId]);
    + lengths.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + BytesWritable result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new BytesWritable();
    + } else {
    + result = (BytesWritable) previous;
    + }
    + int len = (int) lengths.next();
    + result.setSize(len);
    + int offset = 0;
    + while (len > 0) {
    + int written = stream.read(result.getBytes(), offset, len);
    + if (written < 0) {
    + throw new EOFException("Can't finish byte read from " + stream);
    + }
    + len -= written;
    + offset += written;
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + long lengthToSkip = 0;
    + for(int i=0; i < items; ++i) {
    + lengthToSkip += lengths.next();
    + }
    + stream.skip(lengthToSkip);
    + }
    + }
    +
    + private static class TimestampTreeReader extends TreeReader{
    + private RunLengthIntegerReader data;
    + private RunLengthIntegerReader nanos;
    +
    + TimestampTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + data = new RunLengthIntegerReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA)), true);
    + nanos = new RunLengthIntegerReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.NANO_DATA)), false);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + data.seek(index[columnId]);
    + nanos.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + Timestamp result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new Timestamp(0);
    + } else {
    + result = (Timestamp) previous;
    + }
    + long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) *
    + WriterImpl.MILLIS_PER_SECOND;
    + int newNanos = parseNanos(nanos.next());
    + // fix the rounding when we divided by 1000.
    + if (millis >= 0) {
    + millis += newNanos / 1000000;
    + } else {
    + millis -= newNanos / 1000000;
    + }
    + result.setTime(millis);
    + result.setNanos(newNanos);
    + }
    + return result;
    + }
    +
    + private static int parseNanos(long serialized) {
    + int zeros = 7 & (int) serialized;
    + int result = (int) serialized >>> 3;
    + if (zeros != 0) {
    + for(int i =0; i <= zeros; ++i) {
    + result *= 10;
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + data.skip(items);
    + nanos.skip(items);
    + }
    + }
    +
    + private static class StringTreeReader extends TreeReader {
    + private DynamicByteArray dictionaryBuffer = null;
    + private int dictionarySize;
    + private int[] dictionaryOffsets;
    + private RunLengthIntegerReader reader;
    +
    + StringTreeReader(int columnId) {
    + super(columnId);
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    +
    + // read the dictionary blob
    + dictionarySize = encodings.get(columnId).getDictionarySize();
    + StreamName name = new StreamName(columnId,
    + OrcProto.Stream.Kind.DICTIONARY_DATA);
    + InStream in = streams.get(name);
    + if (in.available() > 0) {
    + dictionaryBuffer = new DynamicByteArray(64, in.available());
    + dictionaryBuffer.readAll(in);
    + } else {
    + dictionaryBuffer = null;
    + }
    + in.close();
    +
    + // read the lengths
    + name = new StreamName(columnId, OrcProto.Stream.Kind.LENGTH);
    + in = streams.get(name);
    + RunLengthIntegerReader lenReader = new RunLengthIntegerReader(in, false);
    + int offset = 0;
    + if (dictionaryOffsets == null ||
    + dictionaryOffsets.length < dictionarySize + 1) {
    + dictionaryOffsets = new int[dictionarySize + 1];
    + }
    + for(int i=0; i < dictionarySize; ++i) {
    + dictionaryOffsets[i] = offset;
    + offset += (int) lenReader.next();
    + }
    + dictionaryOffsets[dictionarySize] = offset;
    + in.close();
    +
    + // set up the row reader
    + name = new StreamName(columnId, OrcProto.Stream.Kind.DATA);
    + reader = new RunLengthIntegerReader(streams.get(name), false);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + reader.seek(index[columnId]);
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + Text result = null;
    + if (valuePresent) {
    + int entry = (int) reader.next();
    + if (previous == null) {
    + result = new Text();
    + } else {
    + result = (Text) previous;
    + }
    + int offset = dictionaryOffsets[entry];
    + int length;
    + // if it isn't the last entry, subtract the offsets otherwise use
    + // the buffer length.
    + if (entry < dictionaryOffsets.length - 1) {
    + length = dictionaryOffsets[entry + 1] - offset;
    + } else {
    + length = dictionaryBuffer.size() - offset;
    + }
    + dictionaryBuffer.setText(result, offset, length);
    + }
    + return result;
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + reader.skip(countNonNulls(items));
    + }
    + }
    +
    + private static class StructTreeReader extends TreeReader {
    + private final TreeReader[] fields;
    + private final String[] fieldNames;
    +
    + StructTreeReader(int columnId,
    + List<OrcProto.Type> types,
    + boolean[] included) throws IOException {
    + super(columnId);
    + OrcProto.Type type = types.get(columnId);
    + int fieldCount = type.getFieldNamesCount();
    + this.fields = new TreeReader[fieldCount];
    + this.fieldNames = new String[fieldCount];
    + for(int i=0; i < fieldCount; ++i) {
    + int subtype = type.getSubtypes(i);
    + if (included == null || included[subtype]) {
    + this.fields[i] = createTreeReader(subtype, types, included);
    + }
    + this.fieldNames[i] = type.getFieldNames(i);
    + }
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + for(TreeReader kid: fields) {
    + kid.seek(index);
    + }
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + OrcStruct result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new OrcStruct(fields.length);
    + } else {
    + result = (OrcStruct) previous;
    + }
    + for(int i=0; i < fields.length; ++i) {
    + if (fields[i] != null) {
    + result.setFieldValue(i, fields[i].next(result.getFieldValue(i)));
    + }
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + for(TreeReader field: fields) {
    + if (field != null) {
    + field.startStripe(streams, encodings);
    + }
    + }
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + for(TreeReader field: fields) {
    + field.skipRows(items);
    + }
    + }
    + }
    +
    + private static class UnionTreeReader extends TreeReader {
    + private final TreeReader[] fields;
    + private RunLengthByteReader tags;
    +
    + UnionTreeReader(int columnId,
    + List<OrcProto.Type> types,
    + boolean[] included) throws IOException {
    + super(columnId);
    + OrcProto.Type type = types.get(columnId);
    + int fieldCount = type.getSubtypesCount();
    + this.fields = new TreeReader[fieldCount];
    + for(int i=0; i < fieldCount; ++i) {
    + int subtype = type.getSubtypes(i);
    + if (included == null || included[subtype]) {
    + this.fields[i] = createTreeReader(subtype, types, included);
    + }
    + }
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + tags.seek(index[columnId]);
    + for(TreeReader kid: fields) {
    + kid.seek(index);
    + }
    + }
    +
    + @Override
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + OrcUnion result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new OrcUnion();
    + } else {
    + result = (OrcUnion) previous;
    + }
    + byte tag = tags.next();
    + Object previousVal = result.getObject();
    + result.set(tag, fields[tag].next(tag == result.getTag() ?
    + previousVal : null));
    + }
    + return result;
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + tags = new RunLengthByteReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.DATA)));
    + for(TreeReader field: fields) {
    + if (field != null) {
    + field.startStripe(streams, encodings);
    + }
    + }
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + long[] counts = new long[fields.length];
    + for(int i=0; i < items; ++i) {
    + counts[tags.next()] += 1;
    + }
    + for(int i=0; i < counts.length; ++i) {
    + fields[i].skipRows(counts[i]);
    + }
    + }
    + }
    +
    + private static class ListTreeReader extends TreeReader {
    + private final TreeReader elementReader;
    + private RunLengthIntegerReader lengths;
    +
    + ListTreeReader(int columnId,
    + List<OrcProto.Type> types,
    + boolean[] included) throws IOException {
    + super(columnId);
    + OrcProto.Type type = types.get(columnId);
    + elementReader = createTreeReader(type.getSubtypes(0), types, included);
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + lengths.seek(index[columnId]);
    + elementReader.seek(index);
    + }
    +
    + @Override
    + @SuppressWarnings("unchecked")
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + List<Object> result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new ArrayList<Object>();
    + } else {
    + result = (ArrayList<Object>) previous;
    + }
    + int prevLength = result.size();
    + int length = (int) lengths.next();
    + // extend the list to the new length
    + for(int i=prevLength; i < length; ++i) {
    + result.add(null);
    + }
    + // read the new elements into the array
    + for(int i=0; i< length; i++) {
    + result.set(i, elementReader.next(i < prevLength ?
    + result.get(i) : null));
    + }
    + // remove any extra elements
    + for(int i=prevLength - 1; i >= length; --i) {
    + result.remove(i);
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + lengths = new RunLengthIntegerReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.LENGTH)), false);
    + if (elementReader != null) {
    + elementReader.startStripe(streams, encodings);
    + }
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + long childSkip = 0;
    + for(long i=0; i < items; ++i) {
    + childSkip += lengths.next();
    + }
    + elementReader.skipRows(childSkip);
    + }
    + }
    +
    + private static class MapTreeReader extends TreeReader {
    + private final TreeReader keyReader;
    + private final TreeReader valueReader;
    + private RunLengthIntegerReader lengths;
    +
    + MapTreeReader(int columnId,
    + List<OrcProto.Type> types,
    + boolean[] included) throws IOException {
    + super(columnId);
    + OrcProto.Type type = types.get(columnId);
    + int keyColumn = type.getSubtypes(0);
    + int valueColumn = type.getSubtypes(1);
    + if (included == null || included[keyColumn]) {
    + keyReader = createTreeReader(keyColumn, types, included);
    + } else {
    + keyReader = null;
    + }
    + if (included == null || included[valueColumn]) {
    + valueReader = createTreeReader(valueColumn, types, included);
    + } else {
    + valueReader = null;
    + }
    + }
    +
    + @Override
    + void seek(PositionProvider[] index) throws IOException {
    + super.seek(index);
    + lengths.seek(index[columnId]);
    + keyReader.seek(index);
    + valueReader.seek(index);
    + }
    +
    + @Override
    + @SuppressWarnings("unchecked")
    + Object next(Object previous) throws IOException {
    + super.next(previous);
    + Map<Object, Object> result = null;
    + if (valuePresent) {
    + if (previous == null) {
    + result = new HashMap<Object, Object>();
    + } else {
    + result = (HashMap<Object, Object>) previous;
    + }
    + // for now just clear and create new objects
    + result.clear();
    + int length = (int) lengths.next();
    + // read the new elements into the array
    + for(int i=0; i< length; i++) {
    + result.put(keyReader.next(null), valueReader.next(null));
    + }
    + }
    + return result;
    + }
    +
    + @Override
    + void startStripe(Map<StreamName, InStream> streams,
    + List<OrcProto.ColumnEncoding> encodings
    + ) throws IOException {
    + super.startStripe(streams, encodings);
    + lengths = new RunLengthIntegerReader(streams.get(new StreamName(columnId,
    + OrcProto.Stream.Kind.LENGTH)), false);
    + if (keyReader != null) {
    + keyReader.startStripe(streams, encodings);
    + }
    + if (valueReader != null) {
    + valueReader.startStripe(streams, encodings);
    + }
    + }
    +
    + @Override
    + void skipRows(long items) throws IOException {
    + items = countNonNulls(items);
    + long childSkip = 0;
    + for(long i=0; i < items; ++i) {
    + childSkip += lengths.next();
    + }
    + keyReader.skipRows(childSkip);
    + valueReader.skipRows(childSkip);
    + }
    + }
    +
    + private static TreeReader createTreeReader(int columnId,
    + List<OrcProto.Type> types,
    + boolean[] included
    + ) throws IOException {
    + OrcProto.Type type = types.get(columnId);
    + switch (type.getKind()) {
    + case BOOLEAN:
    + return new BooleanTreeReader(columnId);
    + case BYTE:
    + return new ByteTreeReader(columnId);
    + case DOUBLE:
    + return new DoubleTreeReader(columnId);
    + case FLOAT:
    + return new FloatTreeReader(columnId);
    + case SHORT:
    + return new ShortTreeReader(columnId);
    + case INT:
    + return new IntTreeReader(columnId);
    + case LONG:
    + return new LongTreeReader(columnId);
    + case STRING:
    + return new StringTreeReader(columnId);
    + case BINARY:
    + return new BinaryTreeReader(columnId);
    + case TIMESTAMP:
    + return new TimestampTreeReader(columnId);
    + case STRUCT:
    + return new StructTreeReader(columnId, types, included);
    + case LIST:
    + return new ListTreeReader(columnId, types, included);
    + case MAP:
    + return new MapTreeReader(columnId, types, included);
    + case UNION:
    + return new UnionTreeReader(columnId, types, included);
    + default:
    + throw new IllegalArgumentException("Unsupported type " +
    + type.getKind());
    + }
    + }
    +
    + OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
    + ) throws IOException {
    + long offset = stripe.getOffset() + stripe.getIndexLength() +
    + stripe.getDataLength();
    + int tailLength = (int) stripe.getFooterLength();
    +
    + // read the footer
    + ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
    + file.seek(offset);
    + file.readFully(tailBuf.array(), tailBuf.arrayOffset(), tailLength);
    + return OrcProto.StripeFooter.parseFrom(InStream.create("footer", tailBuf,
    + codec, bufferSize));
    + }
    +
    + private void readStripe() throws IOException {
    + StripeInformation stripe = stripes.get(currentStripe);
    + stripeFooter = readStripeFooter(stripe);
    + long offset = stripe.getOffset();
    + streams.clear();
    +
    + // if we aren't projecting columns, just read the whole stripe
    + if (included == null) {
    + byte[] buffer =
    + new byte[(int) (stripe.getDataLength())];
    + file.seek(offset + stripe.getIndexLength());
    + file.readFully(buffer, 0, buffer.length);
    + int sectionOffset = 0;
    + for(OrcProto.Stream section: stripeFooter.getStreamsList()) {
    + if (StreamName.getArea(section.getKind()) == StreamName.Area.DATA) {
    + int sectionLength = (int) section.getLength();
    + ByteBuffer sectionBuffer = ByteBuffer.wrap(buffer, sectionOffset,
    + sectionLength);
    + StreamName name = new StreamName(section.getColumn(),
    + section.getKind());
    + streams.put(name,
    + InStream.create(name.toString(), sectionBuffer, codec,
    + bufferSize));
    + sectionOffset += sectionLength;
    + }
    + }
    + } else {
    + List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
    + // the index of the current section
    + int currentSection = 0;
    + while (currentSection < streamList.size() &&
    + StreamName.getArea(streamList.get(currentSection).getKind()) !=
    + StreamName.Area.DATA) {
    + currentSection += 1;
    + }
    + // byte position of the current section relative to the stripe start
    + long sectionOffset = stripe.getIndexLength();
    + while (currentSection < streamList.size()) {
    + int bytes = 0;
    +
    + // find the first section that shouldn't be read
    + int excluded=currentSection;
    + while (excluded < streamList.size() &&
    + included[streamList.get(excluded).getColumn()]) {
    + bytes += streamList.get(excluded).getLength();
    + excluded += 1;
    + }
    +
    + // actually read the bytes as a big chunk
    + if (bytes != 0) {
    + byte[] buffer = new byte[bytes];
    + file.seek(offset + sectionOffset);
    + file.readFully(buffer, 0, bytes);
    + sectionOffset += bytes;
    +
    + // create the streams for the sections we just read
    + bytes = 0;
    + while (currentSection < excluded) {
    + OrcProto.Stream section = streamList.get(currentSection);
    + StreamName name =
    + new StreamName(section.getColumn(), section.getKind());
    + this.streams.put(name,
    + InStream.create(name.toString(),
    + ByteBuffer.wrap(buffer, bytes,
    + (int) section.getLength()), codec, bufferSize));
    + currentSection += 1;
    + bytes += section.getLength();
    + }
    + }
    +
    + // skip forward until we get back to a section that we need
    + while (currentSection < streamList.size() &&
    + !included[streamList.get(currentSection).getColumn()]) {
    + sectionOffset += streamList.get(currentSection).getLength();
    + currentSection += 1;
    + }
    + }
    + }
    + reader.startStripe(streams, stripeFooter.getColumnsList());
    + rowInStripe = 0;
    + rowCountInStripe = stripe.getNumberOfRows();
    + rowBaseInStripe = 0;
    + for(int i=0; i < currentStripe; ++i) {
    + rowBaseInStripe += stripes.get(i).getNumberOfRows();
    + }
    + for(int i=0; i < indexes.length; ++i) {
    + indexes[i] = null;
    + }
    + }
    +
    + @Override
    + public boolean hasNext() throws IOException {
    + return rowInStripe < rowCountInStripe || currentStripe < stripes.size() - 1;
    + }
    +
    + @Override
    + public Object next(Object previous) throws IOException {
    + if (rowInStripe >= rowCountInStripe) {
    + currentStripe += 1;
    + readStripe();
    + }
    + rowInStripe += 1;
    + return reader.next(previous);
    + }
    +
    + @Override
    + public void close() throws IOException {
    + file.close();
    + }
    +
    + @Override
    + public long getRowNumber() {
    + return rowInStripe + rowBaseInStripe + firstRow;
    + }
    +
    + /**
    + * Return the fraction of rows that have been read from the selected.
    + * section of the file
    + * @return fraction between 0.0 and 1.0 of rows consumed
    + */
    + @Override
    + public float getProgress() {
    + return ((float) rowBaseInStripe + rowInStripe) / totalRowCount;
    + }
    +
    + private int findStripe(long rowNumber) {
    + if (rowNumber < 0) {
    + throw new IllegalArgumentException("Seek to a negative row number " +
    + rowNumber);
    + } else if (rowNumber < firstRow) {
    + throw new IllegalArgumentException("Seek before reader range " +
    + rowNumber);
    + }
    + rowNumber -= firstRow;
    + for(int i=0; i < stripes.size(); i++) {
    + StripeInformation stripe = stripes.get(i);
    + if (stripe.getNumberOfRows() > rowNumber) {
    + return i;
    + }
    + rowNumber -= stripe.getNumberOfRows();
    + }
    + throw new IllegalArgumentException("Seek after the end of reader range");
    + }
    +
    + private void readRowIndex() throws IOException {
    + long offset = stripes.get(currentStripe).getOffset();
    + for(OrcProto.Stream stream: stripeFooter.getStreamsList()) {
    + if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) {
    + int col = stream.getColumn();
    + if ((included == null || included[col]) && indexes[col] == null) {
    + byte[] buffer = new byte[(int) stream.getLength()];
    + file.seek(offset);
    + file.readFully(buffer);
    + indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create("index",
    + ByteBuffer.wrap(buffer), codec, bufferSize));
    + }
    + }
    + offset += stream.getLength();
    + }
    + }
    +
    + private void seekToRowEntry(int rowEntry) throws IOException {
    + PositionProvider[] index = new PositionProvider[indexes.length];
    + for(int i=0; i < indexes.length; ++i) {
    + if (indexes[i] != null) {
    + index[i]=
    + new PositionProviderImpl(indexes[i].getEntry(rowEntry));
    + }
    + }
    + reader.seek(index);
    + }
    +
    + @Override
    + public void seekToRow(long rowNumber) throws IOException {
    + int rightStripe = findStripe(rowNumber);
    + if (rightStripe != currentStripe) {
    + currentStripe = rightStripe;
    + readStripe();
    + }
    + readRowIndex();
    + rowInStripe = rowNumber - rowBaseInStripe;
    + if (rowIndexStride != 0) {
    + long entry = rowInStripe / rowIndexStride;
    + seekToRowEntry((int) entry);
    + reader.skipRows(rowInStripe - entry * rowIndexStride);
    + } else {
    + reader.skipRows(rowInStripe);
    + }
    + }
    +}
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,88 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.EOFException;
    +import java.io.IOException;
    +
    +class BitFieldReader {
    + private RunLengthByteReader input;
    + private final int bitSize;
    + private int current;
    + private int bitsLeft;
    + private final int mask;
    +
    + BitFieldReader(InStream input,
    + int bitSize) throws IOException {
    + this.input = new RunLengthByteReader(input);
    + this.bitSize = bitSize;
    + mask = (1 << bitSize) - 1;
    + }
    +
    + private void readByte() throws IOException {
    + if (input.hasNext()) {
    + current = 0xff & input.next();
    + bitsLeft = 8;
    + } else {
    + throw new EOFException("Read past end of bit field from " + input);
    + }
    + }
    +
    + int next() throws IOException {
    + int result = 0;
    + int bitsLeftToRead = bitSize;
    + while (bitsLeftToRead > bitsLeft) {
    + result <<= bitsLeft;
    + result |= current & ((1 << bitsLeft) - 1);
    + bitsLeftToRead -= bitsLeft;
    + readByte();
    + }
    + if (bitsLeftToRead > 0) {
    + result <<= bitsLeftToRead;
    + bitsLeft -= bitsLeftToRead;
    + result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
    + }
    + return result & mask;
    + }
    +
    + void seek(PositionProvider index) throws IOException {
    + input.seek(index);
    + int consumed = (int) index.getNext();
    + if (consumed > 8) {
    + throw new IllegalArgumentException("Seek past end of byte at " +
    + consumed + " in " + input);
    + } else if (consumed != 0) {
    + readByte();
    + bitsLeft = 8 - consumed;
    + } else {
    + bitsLeft = 0;
    + }
    + }
    +
    + void skip(long items) throws IOException {
    + long totalBits = bitSize * items;
    + if (bitsLeft >= totalBits) {
    + bitsLeft -= totalBits;
    + } else {
    + totalBits -= bitsLeft;
    + input.skip(totalBits / 8);
    + current = input.next();
    + bitsLeft = (int) (8 - (totalBits % 8));
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldWriter.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,69 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +
    +class BitFieldWriter {
    + private RunLengthByteWriter output;
    + private final int bitSize;
    + private byte current = 0;
    + private int bitsLeft = 8;
    +
    + BitFieldWriter(PositionedOutputStream output,
    + int bitSize) throws IOException {
    + this.output = new RunLengthByteWriter(output);
    + this.bitSize = bitSize;
    + }
    +
    + private void writeByte() throws IOException {
    + output.write(current);
    + current = 0;
    + bitsLeft = 8;
    + }
    +
    + void flush() throws IOException {
    + if (bitsLeft != 8) {
    + writeByte();
    + }
    + output.flush();
    + }
    +
    + void write(int value) throws IOException {
    + int bitsToWrite = bitSize;
    + while (bitsToWrite > bitsLeft) {
    + // add the bits to the bottom of the current word
    + current |= value >>> (bitsToWrite - bitsLeft);
    + // subtract out the bits we just added
    + bitsToWrite -= bitsLeft;
    + // zero out the bits above bitsToWrite
    + value &= (1 << bitsToWrite) - 1;
    + writeByte();
    + }
    + bitsLeft -= bitsToWrite;
    + current |= value << bitsLeft;
    + if (bitsLeft == 0) {
    + writeByte();
    + }
    + }
    +
    + void getPosition(PositionRecorder recorder) throws IOException {
    + output.getPosition(recorder);
    + recorder.addPosition(8 - bitsLeft);
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BooleanColumnStatistics.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,27 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Statistics for boolean columns.
    + */
    +public interface BooleanColumnStatistics extends ColumnStatistics {
    + long getFalseCount();
    +
    + long getTrueCount();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatistics.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,30 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Statistics that are available for all types of columns.
    + */
    +public interface ColumnStatistics {
    + /**
    + * Get the number of values in this column. It will differ from the number
    + * of rows because of NULL values and repeated values.
    + * @return the number of values
    + */
    + long getNumberOfValues();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,516 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    +
    +class ColumnStatisticsImpl implements ColumnStatistics {
    +
    + private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl
    + implements BooleanColumnStatistics {
    + private long trueCount = 0;
    +
    + BooleanStatisticsImpl(OrcProto.ColumnStatistics stats) {
    + super(stats);
    + OrcProto.BucketStatistics bkt = stats.getBucketStatistics();
    + trueCount = bkt.getCount(0);
    + }
    +
    + BooleanStatisticsImpl() {
    + }
    +
    + @Override
    + void reset() {
    + super.reset();
    + trueCount = 0;
    + }
    +
    + @Override
    + void updateBoolean(boolean value) {
    + if (value) {
    + trueCount += 1;
    + }
    + }
    +
    + @Override
    + void merge(ColumnStatisticsImpl other) {
    + super.merge(other);
    + BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other;
    + trueCount += bkt.trueCount;
    + }
    +
    + @Override
    + OrcProto.ColumnStatistics.Builder serialize() {
    + OrcProto.ColumnStatistics.Builder builder = super.serialize();
    + OrcProto.BucketStatistics.Builder bucket =
    + OrcProto.BucketStatistics.newBuilder();
    + bucket.addCount(trueCount);
    + builder.setBucketStatistics(bucket);
    + return builder;
    + }
    +
    + @Override
    + public long getFalseCount() {
    + return getNumberOfValues() - trueCount;
    + }
    +
    + @Override
    + public long getTrueCount() {
    + return trueCount;
    + }
    +
    + @Override
    + public String toString() {
    + return super.toString() + " true: " + trueCount;
    + }
    + }
    +
    + private static final class IntegerStatisticsImpl extends ColumnStatisticsImpl
    + implements IntegerColumnStatistics {
    +
    + private long minimum = Long.MAX_VALUE;
    + private long maximum = Long.MIN_VALUE;
    + private long sum = 0;
    + private boolean hasMinimum = false;
    + private boolean overflow = false;
    +
    + IntegerStatisticsImpl() {
    + }
    +
    + IntegerStatisticsImpl(OrcProto.ColumnStatistics stats) {
    + super(stats);
    + OrcProto.IntegerStatistics intStat = stats.getIntStatistics();
    + if (intStat.hasMinimum()) {
    + hasMinimum = true;
    + minimum = intStat.getMinimum();
    + }
    + if (intStat.hasMaximum()) {
    + maximum = intStat.getMaximum();
    + }
    + if (intStat.hasSum()) {
    + sum = intStat.getSum();
    + } else {
    + overflow = true;
    + }
    + }
    +
    + @Override
    + void reset() {
    + super.reset();
    + hasMinimum = false;
    + minimum = Long.MAX_VALUE;
    + maximum = Long.MIN_VALUE;
    + sum = 0;
    + overflow = false;
    + }
    +
    + @Override
    + void updateInteger(long value) {
    + if (!hasMinimum) {
    + hasMinimum = true;
    + minimum = value;
    + maximum = value;
    + } else if (value < minimum) {
    + minimum = value;
    + } else if (value > maximum) {
    + maximum = value;
    + }
    + if (!overflow) {
    + boolean wasPositive = sum >= 0;
    + sum += value;
    + if ((value >= 0) == wasPositive) {
    + overflow = (sum >= 0) != wasPositive;
    + }
    + }
    + }
    +
    + @Override
    + void merge(ColumnStatisticsImpl other) {
    + IntegerStatisticsImpl otherInt = (IntegerStatisticsImpl) other;
    + if (!hasMinimum) {
    + hasMinimum = otherInt.hasMinimum;
    + minimum = otherInt.minimum;
    + maximum = otherInt.maximum;
    + } else if (otherInt.hasMinimum) {
    + if (otherInt.minimum < minimum) {
    + minimum = otherInt.minimum;
    + }
    + if (otherInt.maximum > maximum) {
    + maximum = otherInt.maximum;
    + }
    + }
    + super.merge(other);
    + overflow |= otherInt.overflow;
    + if (!overflow) {
    + boolean wasPositive = sum >= 0;
    + sum += otherInt.sum;
    + if ((otherInt.sum >= 0) == wasPositive) {
    + overflow = (sum >= 0) != wasPositive;
    + }
    + }
    + }
    +
    + @Override
    + OrcProto.ColumnStatistics.Builder serialize() {
    + OrcProto.ColumnStatistics.Builder builder = super.serialize();
    + OrcProto.IntegerStatistics.Builder intb =
    + OrcProto.IntegerStatistics.newBuilder();
    + if (hasMinimum) {
    + intb.setMinimum(minimum);
    + intb.setMaximum(maximum);
    + }
    + if (!overflow) {
    + intb.setSum(sum);
    + }
    + builder.setIntStatistics(intb);
    + return builder;
    + }
    +
    + @Override
    + public long getMinimum() {
    + return minimum;
    + }
    +
    + @Override
    + public long getMaximum() {
    + return maximum;
    + }
    +
    + @Override
    + public boolean isSumDefined() {
    + return !overflow;
    + }
    +
    + @Override
    + public long getSum() {
    + return sum;
    + }
    +
    + @Override
    + public String toString() {
    + StringBuilder buf = new StringBuilder(super.toString());
    + if (hasMinimum) {
    + buf.append(" min: ");
    + buf.append(minimum);
    + buf.append(" max: ");
    + buf.append(maximum);
    + }
    + if (!overflow) {
    + buf.append(" sum: ");
    + buf.append(sum);
    + }
    + return buf.toString();
    + }
    + }
    +
    + private static final class DoubleStatisticsImpl extends ColumnStatisticsImpl
    + implements DoubleColumnStatistics {
    + private boolean hasMinimum = false;
    + private double minimum = Double.MAX_VALUE;
    + private double maximum = Double.MIN_VALUE;
    + private double sum = 0;
    +
    + DoubleStatisticsImpl() {
    + }
    +
    + DoubleStatisticsImpl(OrcProto.ColumnStatistics stats) {
    + super(stats);
    + OrcProto.DoubleStatistics dbl = stats.getDoubleStatistics();
    + if (dbl.hasMinimum()) {
    + hasMinimum = true;
    + minimum = dbl.getMinimum();
    + }
    + if (dbl.hasMaximum()) {
    + maximum = dbl.getMaximum();
    + }
    + if (dbl.hasSum()) {
    + sum = dbl.getSum();
    + }
    + }
    +
    + @Override
    + void reset() {
    + super.reset();
    + hasMinimum = false;
    + minimum = Double.MAX_VALUE;
    + maximum = Double.MIN_VALUE;
    + sum = 0;
    + }
    +
    + @Override
    + void updateDouble(double value) {
    + if (!hasMinimum) {
    + hasMinimum = true;
    + minimum = value;
    + maximum = value;
    + } else if (value < minimum) {
    + minimum = value;
    + } else if (value > maximum) {
    + maximum = value;
    + }
    + sum += value;
    + }
    +
    + @Override
    + void merge(ColumnStatisticsImpl other) {
    + super.merge(other);
    + DoubleStatisticsImpl dbl = (DoubleStatisticsImpl) other;
    + if (!hasMinimum) {
    + hasMinimum = dbl.hasMinimum;
    + minimum = dbl.minimum;
    + maximum = dbl.maximum;
    + } else if (dbl.hasMinimum) {
    + if (dbl.minimum < minimum) {
    + minimum = dbl.minimum;
    + }
    + if (dbl.maximum > maximum) {
    + maximum = dbl.maximum;
    + }
    + }
    + sum += dbl.sum;
    + }
    +
    + @Override
    + OrcProto.ColumnStatistics.Builder serialize() {
    + OrcProto.ColumnStatistics.Builder builder = super.serialize();
    + OrcProto.DoubleStatistics.Builder dbl =
    + OrcProto.DoubleStatistics.newBuilder();
    + if (hasMinimum) {
    + dbl.setMinimum(minimum);
    + dbl.setMaximum(maximum);
    + }
    + dbl.setSum(sum);
    + builder.setDoubleStatistics(dbl);
    + return builder;
    + }
    +
    + @Override
    + public double getMinimum() {
    + return minimum;
    + }
    +
    + @Override
    + public double getMaximum() {
    + return maximum;
    + }
    +
    + @Override
    + public double getSum() {
    + return sum;
    + }
    +
    + @Override
    + public String toString() {
    + StringBuilder buf = new StringBuilder(super.toString());
    + if (hasMinimum) {
    + buf.append(" min: ");
    + buf.append(minimum);
    + buf.append(" max: ");
    + buf.append(maximum);
    + }
    + buf.append(" sum: ");
    + buf.append(sum);
    + return buf.toString();
    + }
    + }
    +
    + private static final class StringStatisticsImpl extends ColumnStatisticsImpl
    + implements StringColumnStatistics {
    + private String minimum = null;
    + private String maximum = null;
    +
    + StringStatisticsImpl() {
    + }
    +
    + StringStatisticsImpl(OrcProto.ColumnStatistics stats) {
    + super(stats);
    + OrcProto.StringStatistics str = stats.getStringStatistics();
    + if (str.hasMaximum()) {
    + maximum = str.getMaximum();
    + }
    + if (str.hasMinimum()) {
    + minimum = str.getMinimum();
    + }
    + }
    +
    + @Override
    + void reset() {
    + super.reset();
    + minimum = null;
    + maximum = null;
    + }
    +
    + @Override
    + void updateString(String value) {
    + if (minimum == null) {
    + minimum = value;
    + maximum = value;
    + } else if (minimum.compareTo(value) > 0) {
    + minimum = value;
    + } else if (maximum.compareTo(value) < 0) {
    + maximum = value;
    + }
    + }
    +
    + @Override
    + void merge(ColumnStatisticsImpl other) {
    + super.merge(other);
    + StringStatisticsImpl str = (StringStatisticsImpl) other;
    + if (minimum == null) {
    + minimum = str.minimum;
    + maximum = str.maximum;
    + } else if (str.minimum != null) {
    + if (minimum.compareTo(str.minimum) > 0) {
    + minimum = str.minimum;
    + } else if (maximum.compareTo(str.maximum) < 0) {
    + maximum = str.maximum;
    + }
    + }
    + }
    +
    + @Override
    + OrcProto.ColumnStatistics.Builder serialize() {
    + OrcProto.ColumnStatistics.Builder result = super.serialize();
    + OrcProto.StringStatistics.Builder str =
    + OrcProto.StringStatistics.newBuilder();
    + if (getNumberOfValues() != 0) {
    + str.setMinimum(minimum);
    + str.setMaximum(maximum);
    + }
    + result.setStringStatistics(str);
    + return result;
    + }
    +
    + @Override
    + public String getMinimum() {
    + return minimum;
    + }
    +
    + @Override
    + public String getMaximum() {
    + return maximum;
    + }
    +
    + @Override
    + public String toString() {
    + StringBuilder buf = new StringBuilder(super.toString());
    + if (getNumberOfValues() != 0) {
    + buf.append(" min: ");
    + buf.append(minimum);
    + buf.append(" max: ");
    + buf.append(maximum);
    + }
    + return buf.toString();
    + }
    + }
    +
    + private long count = 0;
    +
    + ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
    + if (stats.hasNumberOfValues()) {
    + count = stats.getNumberOfValues();
    + }
    + }
    +
    + ColumnStatisticsImpl() {
    + }
    +
    + void increment() {
    + count += 1;
    + }
    +
    + void updateBoolean(boolean value) {
    + throw new UnsupportedOperationException("Can't update boolean");
    + }
    +
    + void updateInteger(long value) {
    + throw new UnsupportedOperationException("Can't update integer");
    + }
    +
    + void updateDouble(double value) {
    + throw new UnsupportedOperationException("Can't update double");
    + }
    +
    + void updateString(String value) {
    + throw new UnsupportedOperationException("Can't update string");
    + }
    +
    + void merge(ColumnStatisticsImpl stats) {
    + count += stats.count;
    + }
    +
    + void reset() {
    + count = 0;
    + }
    +
    + @Override
    + public long getNumberOfValues() {
    + return count;
    + }
    +
    + @Override
    + public String toString() {
    + return "count: " + count;
    + }
    +
    + OrcProto.ColumnStatistics.Builder serialize() {
    + OrcProto.ColumnStatistics.Builder builder =
    + OrcProto.ColumnStatistics.newBuilder();
    + builder.setNumberOfValues(count);
    + return builder;
    + }
    +
    + static ColumnStatisticsImpl create(ObjectInspector inspector) {
    + switch (inspector.getCategory()) {
    + case PRIMITIVE:
    + switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
    + case BOOLEAN:
    + return new BooleanStatisticsImpl();
    + case BYTE:
    + case SHORT:
    + case INT:
    + case LONG:
    + return new IntegerStatisticsImpl();
    + case FLOAT:
    + case DOUBLE:
    + return new DoubleStatisticsImpl();
    + case STRING:
    + return new StringStatisticsImpl();
    + default:
    + return new ColumnStatisticsImpl();
    + }
    + default:
    + return new ColumnStatisticsImpl();
    + }
    + }
    +
    + static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
    + if (stats.hasBucketStatistics()) {
    + return new BooleanStatisticsImpl(stats);
    + } else if (stats.hasIntStatistics()) {
    + return new IntegerStatisticsImpl(stats);
    + } else if (stats.hasDoubleStatistics()) {
    + return new DoubleStatisticsImpl(stats);
    + } else if (stats.hasStringStatistics()) {
    + return new StringStatisticsImpl(stats);
    + } else {
    + return new ColumnStatisticsImpl(stats);
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,42 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +
    +interface CompressionCodec {
    + /**
    + * Compress the in buffer to the out buffer.
    + * @param in the bytes to compress
    + * @param out the uncompressed bytes
    + * @param overflow put any additional bytes here
    + * @return true if the output is smaller than input
    + * @throws IOException
    + */
    + boolean compress(ByteBuffer in, ByteBuffer out, ByteBuffer overflow
    + ) throws IOException;
    +
    + /**
    + * Decompress the in buffer to the out buffer.
    + * @param in the bytes to decompress
    + * @param out the decompressed bytes
    + * @throws IOException
    + */
    + void decompress(ByteBuffer in, ByteBuffer out) throws IOException;
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionKind.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,27 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * An enumeration that lists the generic compression algorithms that
    + * can be applied to ORC files.
    + */
    +public enum CompressionKind {
    + NONE, ZLIB, SNAPPY, LZO
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DoubleColumnStatistics.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,44 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Statistics for float and double columns.
    + */
    +public interface DoubleColumnStatistics extends ColumnStatistics {
    +
    + /**
    + * Get the smallest value in the column. Only defined if getNumberOfValues
    + * is non-zero.
    + * @return the minimum
    + */
    + double getMinimum();
    +
    + /**
    + * Get the largest value in the column. Only defined if getNumberOfValues
    + * is non-zero.
    + * @return the maximum
    + */
    + double getMaximum();
    +
    + /**
    + * Get the sum of the values in the column.
    + * @return the sum
    + */
    + double getSum();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicByteArray.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,270 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.io.Text;
    +
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.io.OutputStream;
    +import java.nio.ByteBuffer;
    +
    +/**
    + * A class that is a growable array of bytes. Growth is managed in terms of
    + * chunks that are allocated when needed.
    + */
    +final class DynamicByteArray {
    + static final int DEFAULT_CHUNKSIZE = 32 * 1024;
    + static final int DEFAULT_NUM_CHUNKS = 128;
    +
    + private final int chunkSize; // our allocation sizes
    + private byte[][] data; // the real data
    + private int length; // max set element index +1
    + private int initializedChunks = 0; // the number of chunks created
    +
    + public DynamicByteArray() {
    + this(DEFAULT_NUM_CHUNKS, DEFAULT_CHUNKSIZE);
    + }
    +
    + public DynamicByteArray(int numChunks, int chunkSize) {
    + if (chunkSize == 0) {
    + throw new IllegalArgumentException("bad chunksize");
    + }
    + this.chunkSize = chunkSize;
    + data = new byte[numChunks][];
    + }
    +
    + /**
    + * Ensure that the given index is valid.
    + */
    + private void grow(int chunkIndex) {
    + if (chunkIndex >= initializedChunks) {
    + if (chunkIndex >= data.length) {
    + int newSize = Math.max(chunkIndex + 1, 2 * data.length);
    + byte[][] newChunk = new byte[newSize][];
    + System.arraycopy(data, 0, newChunk, 0, data.length);
    + data = newChunk;
    + }
    + for(int i=initializedChunks; i <= chunkIndex; ++i) {
    + data[i] = new byte[chunkSize];
    + }
    + initializedChunks = chunkIndex + 1;
    + }
    + }
    +
    + public byte get(int index) {
    + if (index >= length) {
    + throw new IndexOutOfBoundsException("Index " + index +
    + " is outside of 0.." +
    + (length - 1));
    + }
    + int i = index / chunkSize;
    + int j = index % chunkSize;
    + return data[i][j];
    + }
    +
    + public void set(int index, byte value) {
    + int i = index / chunkSize;
    + int j = index % chunkSize;
    + grow(i);
    + if (index >= length) {
    + length = index + 1;
    + }
    + data[i][j] = value;
    + }
    +
    + public int add(byte value) {
    + int i = length / chunkSize;
    + int j = length % chunkSize;
    + grow(i);
    + data[i][j] = value;
    + int result = length;
    + length += 1;
    + return result;
    + }
    +
    + /**
    + * Copy a slice of a byte array into our buffer.
    + * @param value the array to copy from
    + * @param valueOffset the first location to copy from value
    + * @param valueLength the number of bytes to copy from value
    + * @return
    + */
    + public int add(byte[] value, int valueOffset, int valueLength) {
    + int i = length / chunkSize;
    + int j = length % chunkSize;
    + grow((length + valueLength) / chunkSize);
    + int remaining = valueLength;
    + while (remaining > 0) {
    + int size = Math.min(remaining, chunkSize - j);
    + System.arraycopy(value, valueOffset, data[i], j, size);
    + remaining -= size;
    + valueOffset += size;
    + i += 1;
    + j = 0;
    + }
    + int result = length;
    + length += valueLength;
    + return result;
    + }
    +
    + /**
    + * Read the entire stream into this array.
    + * @param in the stream to read from
    + * @throws IOException
    + */
    + public void readAll(InputStream in) throws IOException {
    + int currentChunk = length / chunkSize;
    + int currentOffset = length % chunkSize;
    + grow(currentChunk);
    + int currentLength = in.read(data[currentChunk], currentOffset,
    + chunkSize - currentOffset);
    + while (currentLength > 0) {
    + length += currentLength;
    + currentOffset = length % chunkSize;
    + if (currentOffset == 0) {
    + currentChunk = length / chunkSize;
    + grow(currentChunk);
    + }
    + currentLength = in.read(data[currentChunk], currentOffset,
    + chunkSize - currentOffset);
    + }
    + }
    +
    + /**
    + * Byte compare a set of bytes against the bytes in this dynamic array.
    + * @param other source of the other bytes
    + * @param otherOffset start offset in the other array
    + * @param otherLength number of bytes in the other array
    + * @param ourOffset the offset in our array
    + * @param ourLength the number of bytes in our array
    + * @return negative for less, 0 for equal, positive for greater
    + */
    + public int compare(byte[] other, int otherOffset, int otherLength,
    + int ourOffset, int ourLength) {
    + int currentChunk = ourOffset / chunkSize;
    + int currentOffset = ourOffset % chunkSize;
    + int maxLength = Math.min(otherLength, ourLength);
    + while (maxLength > 0 &&
    + other[otherOffset] == data[currentChunk][currentOffset]) {
    + otherOffset += 1;
    + currentOffset += 1;
    + if (currentOffset == chunkSize) {
    + currentChunk += 1;
    + currentOffset = 0;
    + }
    + maxLength -= 1;
    + }
    + if (maxLength == 0) {
    + return otherLength - ourLength;
    + }
    + int otherByte = 0xff & other[otherOffset];
    + int ourByte = 0xff & data[currentChunk][currentOffset];
    + return otherByte > ourByte ? 1 : -1;
    + }
    +
    + /**
    + * Get the size of the array.
    + * @return the number of bytes in the array
    + */
    + public int size() {
    + return length;
    + }
    +
    + /**
    + * Clear the array to its original pristine state.
    + */
    + public void clear() {
    + length = 0;
    + for(int i=0; i < data.length; ++i) {
    + data[i] = null;
    + }
    + initializedChunks = 0;
    + }
    +
    + /**
    + * Set a text value from the bytes in this dynamic array.
    + * @param result the value to set
    + * @param offset the start of the bytes to copy
    + * @param length the number of bytes to copy
    + */
    + public void setText(Text result, int offset, int length) {
    + result.clear();
    + int currentChunk = offset / chunkSize;
    + int currentOffset = offset % chunkSize;
    + int currentLength = Math.min(length, chunkSize - currentOffset);
    + while (length > 0) {
    + result.append(data[currentChunk], currentOffset, currentLength);
    + length -= currentLength;
    + currentChunk += 1;
    + currentOffset = 0;
    + currentLength = Math.min(length, chunkSize - currentOffset);
    + }
    + }
    +
    + /**
    + * Write out a range of this dynamic array to an output stream.
    + * @param out the stream to write to
    + * @param offset the first offset to write
    + * @param length the number of bytes to write
    + * @throws IOException
    + */
    + public void write(OutputStream out, int offset,
    + int length) throws IOException {
    + int currentChunk = offset / chunkSize;
    + int currentOffset = offset % chunkSize;
    + while (length > 0) {
    + int currentLength = Math.min(length, chunkSize - currentOffset);
    + out.write(data[currentChunk], currentOffset, currentLength);
    + length -= currentLength;
    + currentChunk += 1;
    + currentOffset = 0;
    + }
    + }
    +
    + public String toString() {
    + int i;
    + StringBuilder sb = new StringBuilder(length * 3);
    +
    + sb.append('{');
    + int l = length - 1;
    + for (i=0; i<l; i++) {
    + sb.append(Integer.toHexString(get(i)));
    + sb.append(',');
    + }
    + sb.append(get(i));
    + sb.append('}');
    +
    + return sb.toString();
    + }
    +
    + public void setByteBuffer(ByteBuffer result, int offset, int length) {
    + result.clear();
    + int currentChunk = offset / chunkSize;
    + int currentOffset = offset % chunkSize;
    + int currentLength = Math.min(length, chunkSize - currentOffset);
    + while (length > 0) {
    + result.put(data[currentChunk], currentOffset, currentLength);
    + length -= currentLength;
    + currentChunk += 1;
    + currentOffset = 0;
    + currentLength = Math.min(length, chunkSize - currentOffset);
    + }
    + }
    +}
    +

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DynamicIntArray.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,139 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Dynamic int array that uses primitive types and chunks to avoid copying
    + * large number of integers when it resizes.
    + *
    + * The motivation for this class is memory optimization, i.e. space efficient
    + * storage of potentially huge arrays without good a-priori size guesses.
    + *
    + * The API of this class is between a primitive array and a AbstractList. It's
    + * not a Collection implementation because it handles primitive types, but the
    + * API could be extended to support iterators and the like.
    + *
    + * NOTE: Like standard Collection implementations/arrays, this class is not
    + * synchronized.
    + */
    +final class DynamicIntArray {
    + static final int DEFAULT_CHUNKSIZE = 8 * 1024;
    + static final int INIT_CHUNKS = 128;
    +
    + private final int chunkSize; // our allocation size
    + private int[][] data; // the real data
    + private int length; // max set element index +1
    + private int initializedChunks = 0; // the number of created chunks
    +
    + public DynamicIntArray() {
    + this(DEFAULT_CHUNKSIZE);
    + }
    +
    + public DynamicIntArray(int chunkSize) {
    + this.chunkSize = chunkSize;
    +
    + data = new int[INIT_CHUNKS][];
    + }
    +
    + /**
    + * Ensure that the given index is valid.
    + */
    + private void grow(int chunkIndex) {
    + if (chunkIndex >= initializedChunks) {
    + if (chunkIndex >= data.length) {
    + int newSize = Math.max(chunkIndex + 1, 2 * data.length);
    + int[][] newChunk = new int[newSize][];
    + System.arraycopy(data, 0, newChunk, 0, data.length);
    + data = newChunk;
    + }
    + for (int i=initializedChunks; i <= chunkIndex; ++i) {
    + data[i] = new int[chunkSize];
    + }
    + initializedChunks = chunkIndex + 1;
    + }
    + }
    +
    + public int get(int index) {
    + if (index >= length) {
    + throw new IndexOutOfBoundsException("Index " + index +
    + " is outside of 0.." +
    + (length - 1));
    + }
    + int i = index / chunkSize;
    + int j = index % chunkSize;
    + return data[i][j];
    + }
    +
    + public void set(int index, int value) {
    + int i = index / chunkSize;
    + int j = index % chunkSize;
    + grow(i);
    + if (index >= length) {
    + length = index + 1;
    + }
    + data[i][j] = value;
    + }
    +
    + public void increment(int index, int value) {
    + int i = index / chunkSize;
    + int j = index % chunkSize;
    + grow(i);
    + if (index >= length) {
    + length = index + 1;
    + }
    + data[i][j] += value;
    + }
    +
    + public void add(int value) {
    + int i = length / chunkSize;
    + int j = length % chunkSize;
    + grow(i);
    + data[i][j] = value;
    + length += 1;
    + }
    +
    + public int size() {
    + return length;
    + }
    +
    + public void clear() {
    + length = 0;
    + for(int i=0; i < data.length; ++i) {
    + data[i] = null;
    + }
    + initializedChunks = 0;
    + }
    +
    + public String toString() {
    + int i;
    + StringBuilder sb = new StringBuilder(length * 4);
    +
    + sb.append('{');
    + int l = length - 1;
    + for (i=0; i<l; i++) {
    + sb.append(get(i));
    + sb.append(',');
    + }
    + sb.append(get(i));
    + sb.append('}');
    +
    + return sb.toString();
    + }
    +
    +}
    +

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,78 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.Path;
    +
    +/**
    + * A tool for printing out the file structure of ORC files.
    + */
    +public final class FileDump {
    +
    + // not used
    + private FileDump() {}
    +
    + public static void main(String[] args) throws Exception {
    + Configuration conf = new Configuration();
    + for(String filename: args) {
    + System.out.println("Structure for " + filename);
    + Path path = new Path(filename);
    + Reader reader = OrcFile.createReader(path.getFileSystem(conf), path);
    + RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
    + System.out.println("Rows: " + reader.getNumberOfRows());
    + System.out.println("Compression: " + reader.getCompression());
    + if (reader.getCompression() != CompressionKind.NONE) {
    + System.out.println("Compression size: " + reader.getCompressionSize());
    + }
    + System.out.println("Type: " + reader.getObjectInspector().getTypeName());
    + ColumnStatistics[] stats = reader.getStatistics();
    + System.out.println("\nStatistics:");
    + for(int i=0; i < stats.length; ++i) {
    + System.out.println(" Column " + i + ": " + stats[i].toString());
    + }
    + System.out.println("\nStripes:");
    + for(StripeInformation stripe: reader.getStripes()) {
    + long stripeStart = stripe.getOffset();
    + System.out.println(" Stripe: " + stripe.toString());
    + OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
    + long sectionStart = stripeStart;
    + for(OrcProto.Stream section: footer.getStreamsList()) {
    + System.out.println(" Stream: column " + section.getColumn() +
    + " section " + section.getKind() + " start: " + sectionStart +
    + " length " + section.getLength());
    + sectionStart += section.getLength();
    + }
    + for(int i=0; i < footer.getColumnsCount(); ++i) {
    + OrcProto.ColumnEncoding encoding = footer.getColumns(i);
    + StringBuilder buf = new StringBuilder();
    + buf.append(" Encoding column ");
    + buf.append(i);
    + buf.append(": ");
    + buf.append(encoding.getKind());
    + if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY) {
    + buf.append("[");
    + buf.append(encoding.getDictionarySize());
    + buf.append("]");
    + }
    + System.out.println(buf);
    + }
    + }
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,216 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.nio.ByteBuffer;
    +
    +abstract class InStream extends InputStream {
    +
    + private static class UncompressedStream extends InStream {
    + private final String name;
    + private byte[] array;
    + private int offset;
    + private final int base;
    + private final int limit;
    +
    + public UncompressedStream(String name, ByteBuffer input) {
    + this.name = name;
    + this.array = input.array();
    + base = input.arrayOffset() + input.position();
    + offset = base;
    + limit = input.arrayOffset() + input.limit();
    + }
    +
    + @Override
    + public int read() {
    + if (offset == limit) {
    + return -1;
    + }
    + return 0xff & array[offset++];
    + }
    +
    + @Override
    + public int read(byte[] data, int offset, int length) {
    + if (this.offset == limit) {
    + return -1;
    + }
    + int actualLength = Math.min(length, limit - this.offset);
    + System.arraycopy(array, this.offset, data, offset, actualLength);
    + this.offset += actualLength;
    + return actualLength;
    + }
    +
    + @Override
    + public int available() {
    + return limit - offset;
    + }
    +
    + @Override
    + public void close() {
    + array = null;
    + offset = 0;
    + }
    +
    + @Override
    + public void seek(PositionProvider index) throws IOException {
    + offset = base + (int) index.getNext();
    + }
    +
    + @Override
    + public String toString() {
    + return "uncompressed stream " + name + " base: " + base +
    + " offset: " + offset + " limit: " + limit;
    + }
    + }
    +
    + private static class CompressedStream extends InStream {
    + private final String name;
    + private byte[] array;
    + private final int bufferSize;
    + private ByteBuffer uncompressed = null;
    + private final CompressionCodec codec;
    + private int offset;
    + private final int base;
    + private final int limit;
    + private boolean isUncompressedOriginal;
    +
    + public CompressedStream(String name, ByteBuffer input,
    + CompressionCodec codec, int bufferSize
    + ) {
    + this.array = input.array();
    + this.name = name;
    + this.codec = codec;
    + this.bufferSize = bufferSize;
    + base = input.arrayOffset() + input.position();
    + offset = base;
    + limit = input.arrayOffset() + input.limit();
    + }
    +
    + private void readHeader() throws IOException {
    + if (limit - offset > OutStream.HEADER_SIZE) {
    + int chunkLength = ((0xff & array[offset + 2]) << 15) |
    + ((0xff & array[offset + 1]) << 7) | ((0xff & array[offset]) >> 1);
    + if (chunkLength > bufferSize) {
    + throw new IllegalArgumentException("Buffer size too small. size = " +
    + bufferSize + " needed = " + chunkLength);
    + }
    + boolean isOriginal = (array[offset] & 0x01) == 1;
    + offset += OutStream.HEADER_SIZE;
    + if (isOriginal) {
    + isUncompressedOriginal = true;
    + uncompressed = ByteBuffer.wrap(array, offset, chunkLength);
    + } else {
    + if (isUncompressedOriginal) {
    + uncompressed = ByteBuffer.allocate(bufferSize);
    + isUncompressedOriginal = false;
    + } else if (uncompressed == null) {
    + uncompressed = ByteBuffer.allocate(bufferSize);
    + } else {
    + uncompressed.clear();
    + }
    + codec.decompress(ByteBuffer.wrap(array, offset, chunkLength),
    + uncompressed);
    + }
    + offset += chunkLength;
    + } else {
    + throw new IllegalStateException("Can't read header");
    + }
    + }
    +
    + @Override
    + public int read() throws IOException {
    + if (uncompressed == null || uncompressed.remaining() == 0) {
    + if (offset == limit) {
    + return -1;
    + }
    + readHeader();
    + }
    + return 0xff & uncompressed.get();
    + }
    +
    + @Override
    + public int read(byte[] data, int offset, int length) throws IOException {
    + if (uncompressed == null || uncompressed.remaining() == 0) {
    + if (this.offset == this.limit) {
    + return -1;
    + }
    + readHeader();
    + }
    + int actualLength = Math.min(length, uncompressed.remaining());
    + System.arraycopy(uncompressed.array(),
    + uncompressed.arrayOffset() + uncompressed.position(), data,
    + offset, actualLength);
    + uncompressed.position(uncompressed.position() + actualLength);
    + return actualLength;
    + }
    +
    + @Override
    + public int available() throws IOException {
    + if (uncompressed == null || uncompressed.remaining() == 0) {
    + if (offset == limit) {
    + return 0;
    + }
    + readHeader();
    + }
    + return uncompressed.remaining();
    + }
    +
    + @Override
    + public void close() {
    + array = null;
    + uncompressed = null;
    + offset = 0;
    + }
    +
    + @Override
    + public void seek(PositionProvider index) throws IOException {
    + offset = base + (int) index.getNext();
    + int uncompBytes = (int) index.getNext();
    + if (uncompBytes != 0) {
    + readHeader();
    + uncompressed.position(uncompressed.position() + uncompBytes);
    + } else if (uncompressed != null) {
    + uncompressed.position(uncompressed.limit());
    + }
    + }
    +
    + @Override
    + public String toString() {
    + return "compressed stream " + name + " base: " + base +
    + " offset: " + offset + " limit: " + limit +
    + (uncompressed == null ? "" :
    + " uncompressed: " + uncompressed.position() + " to " +
    + uncompressed.limit());
    + }
    + }
    +
    + public abstract void seek(PositionProvider index) throws IOException;
    +
    + public static InStream create(String name,
    + ByteBuffer input,
    + CompressionCodec codec,
    + int bufferSize) throws IOException {
    + if (codec == null) {
    + return new UncompressedStream(name, input);
    + } else {
    + return new CompressedStream(name, input, codec, bufferSize);
    + }
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/IntegerColumnStatistics.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,50 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +/**
    + * Statistics for all of the integer columns, such as byte, short, int, and
    + * long.
    + */
    +public interface IntegerColumnStatistics extends ColumnStatistics {
    + /**
    + * Get the smallest value in the column. Only defined if getNumberOfValues
    + * is non-zero.
    + * @return the minimum
    + */
    + long getMinimum();
    +
    + /**
    + * Get the largest value in the column. Only defined if getNumberOfValues
    + * is non-zero.
    + * @return the maximum
    + */
    + long getMaximum();
    +
    + /**
    + * Is the sum defined? If the sum overflowed the counter this will be false.
    + * @return is the sum available
    + */
    + boolean isSumDefined();
    +
    + /**
    + * Get the sum of the column. Only valid if isSumDefined returns true.
    + * @return the sum of the column
    + */
    + long getSum();
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,82 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +
    +import java.io.IOException;
    +
    +/**
    + * Contains factory methods to read or write ORC files.
    + */
    +public final class OrcFile {
    +
    + public static final String MAGIC = "ORC";
    + public static final String COMPRESSION = "orc.compress";
    + static final String DEFAULT_COMPRESSION = "ZLIB";
    + public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
    + static final String DEFAULT_COMPRESSION_BLOCK_SIZE = "262144";
    + public static final String STRIPE_SIZE = "orc.stripe.size";
    + static final String DEFAULT_STRIPE_SIZE = "268435456";
    + public static final String ROW_INDEX_STRIDE = "orc.row.index.stride";
    + static final String DEFAULT_ROW_INDEX_STRIDE = "10000";
    + public static final String ENABLE_INDEXES = "orc.create.index";
    +
    + // unused
    + private OrcFile() {}
    +
    + /**
    + * Create an ORC file reader.
    + * @param fs file system
    + * @param path file name to read from
    + * @return a new ORC file reader.
    + * @throws IOException
    + */
    + public static Reader createReader(FileSystem fs, Path path
    + ) throws IOException {
    + return new ReaderImpl(fs, path);
    + }
    +
    + /**
    + * Create an ORC file streamFactory.
    + * @param fs file system
    + * @param path filename to write to
    + * @param inspector the ObjectInspector that inspects the rows
    + * @param stripeSize the number of bytes in a stripe
    + * @param compress how to compress the file
    + * @param bufferSize the number of bytes to compress at once
    + * @param rowIndexStride the number of rows between row index entries or
    + * 0 to suppress all indexes
    + * @return a new ORC file streamFactory
    + * @throws IOException
    + */
    + public static Writer createWriter(FileSystem fs,
    + Path path,
    + ObjectInspector inspector,
    + long stripeSize,
    + CompressionKind compress,
    + int bufferSize,
    + int rowIndexStride) throws IOException {
    + return new WriterImpl(fs, path, inspector, stripeSize, compress,
    + bufferSize, rowIndexStride);
    + }
    +
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,192 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileStatus;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.ql.io.InputFormatChecker;
    +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
    +import org.apache.hadoop.io.NullWritable;
    +import org.apache.hadoop.mapred.FileInputFormat;
    +import org.apache.hadoop.mapred.FileSplit;
    +import org.apache.hadoop.mapred.InputSplit;
    +import org.apache.hadoop.mapred.JobConf;
    +import org.apache.hadoop.mapred.RecordReader;
    +import org.apache.hadoop.mapred.Reporter;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +/**
    + * A MapReduce/Hive input format for ORC files.
    + */
    +public class OrcInputFormat extends FileInputFormat<NullWritable, OrcStruct>
    + implements InputFormatChecker {
    +
    + private static class OrcRecordReader
    + implements RecordReader<NullWritable, OrcStruct> {
    + private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader;
    + private final long offset;
    + private final long length;
    + private final OrcStruct row;
    + private boolean firstRow = true;
    + private float progress = 0.0f;
    +
    + OrcRecordReader(Reader file, Configuration conf,
    + long offset, long length) throws IOException {
    + this.reader = file.rows(offset, length,
    + findIncludedColumns(file.getTypes(), conf));
    + this.offset = offset;
    + this.length = length;
    + if (reader.hasNext()) {
    + row = (OrcStruct) reader.next(null);
    + } else {
    + row = null;
    + }
    + }
    +
    + @Override
    + public boolean next(NullWritable key, OrcStruct value) throws IOException {
    + if (firstRow) {
    + firstRow = false;
    + assert value == row: "User didn't pass our value back " + value;
    + return row != null;
    + } else if (reader.hasNext()) {
    + Object obj = reader.next(value);
    + progress = reader.getProgress();
    + assert obj == value : "Reader returned different object " + obj;
    + return true;
    + }
    + return false;
    + }
    +
    + @Override
    + public NullWritable createKey() {
    + return NullWritable.get();
    + }
    +
    + @Override
    + public OrcStruct createValue() {
    + return row;
    + }
    +
    + @Override
    + public long getPos() throws IOException {
    + return offset + (long) (progress * length);
    + }
    +
    + @Override
    + public void close() throws IOException {
    + reader.close();
    + }
    +
    + @Override
    + public float getProgress() throws IOException {
    + return progress;
    + }
    + }
    +
    + public OrcInputFormat() {
    + // just set a really small lower bound
    + setMinSplitSize(16 * 1024);
    + }
    +
    + /**
    + * Recurse down into a type subtree turning on all of the sub-columns.
    + * @param types the types of the file
    + * @param result the global view of columns that should be included
    + * @param typeId the root of tree to enable
    + */
    + private static void includeColumnRecursive(List<OrcProto.Type> types,
    + boolean[] result,
    + int typeId) {
    + result[typeId] = true;
    + OrcProto.Type type = types.get(typeId);
    + int children = type.getSubtypesCount();
    + for(int i=0; i < children; ++i) {
    + includeColumnRecursive(types, result, type.getSubtypes(i));
    + }
    + }
    +
    + /**
    + * Take the configuration and figure out which columns we need to include.
    + * @param types the types of the file
    + * @param conf the configuration
    + * @return true for each column that should be included
    + */
    + private static boolean[] findIncludedColumns(List<OrcProto.Type> types,
    + Configuration conf) {
    + String includedStr =
    + conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
    + if (includedStr == null) {
    + return null;
    + } else {
    + int numColumns = types.size();
    + boolean[] result = new boolean[numColumns];
    + result[0] = true;
    + OrcProto.Type root = types.get(0);
    + List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
    + for(int i=0; i < root.getSubtypesCount(); ++i) {
    + if (included.contains(i)) {
    + includeColumnRecursive(types, result, root.getSubtypes(i));
    + }
    + }
    + // if we are filtering at least one column, return the boolean array
    + for(boolean include: result) {
    + if (!include) {
    + return result;
    + }
    + }
    + return null;
    + }
    + }
    +
    + @Override
    + public RecordReader<NullWritable, OrcStruct>
    + getRecordReader(InputSplit inputSplit, JobConf conf,
    + Reporter reporter) throws IOException {
    + FileSplit fileSplit = (FileSplit) inputSplit;
    + Path path = fileSplit.getPath();
    + FileSystem fs = path.getFileSystem(conf);
    + reporter.setStatus(fileSplit.toString());
    + return new OrcRecordReader(OrcFile.createReader(fs, path), conf,
    + fileSplit.getStart(), fileSplit.getLength());
    + }
    +
    + @Override
    + public boolean validateInput(FileSystem fs, HiveConf conf,
    + ArrayList<FileStatus> files
    + ) throws IOException {
    + if (files.size() <= 0) {
    + return false;
    + }
    + for (FileStatus file : files) {
    + try {
    + OrcFile.createReader(fs, file.getPath());
    + } catch (IOException e) {
    + return false;
    + }
    + }
    + return true;
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,144 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
    +import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
    +import org.apache.hadoop.hive.ql.io.orc.OrcSerde.OrcSerdeRow;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.apache.hadoop.io.NullWritable;
    +import org.apache.hadoop.io.Writable;
    +import org.apache.hadoop.mapred.FileOutputFormat;
    +import org.apache.hadoop.mapred.JobConf;
    +import org.apache.hadoop.mapred.RecordWriter;
    +import org.apache.hadoop.mapred.Reporter;
    +import org.apache.hadoop.util.Progressable;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Properties;
    +
    +/**
    + * A Hive OutputFormat for ORC files.
    + */
    +public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
    + implements HiveOutputFormat<NullWritable, OrcSerdeRow> {
    +
    + private static class OrcRecordWriter
    + implements RecordWriter<NullWritable, OrcSerdeRow>,
    + FileSinkOperator.RecordWriter {
    + private Writer writer = null;
    + private final FileSystem fs;
    + private final Path path;
    + private final Configuration conf;
    + private final long stripeSize;
    + private final int compressionSize;
    + private final CompressionKind compress;
    + private final int rowIndexStride;
    +
    + OrcRecordWriter(FileSystem fs, Path path, Configuration conf,
    + String stripeSize, String compress,
    + String compressionSize, String rowIndexStride) {
    + this.fs = fs;
    + this.path = path;
    + this.conf = conf;
    + this.stripeSize = Long.valueOf(stripeSize);
    + this.compress = CompressionKind.valueOf(compress);
    + this.compressionSize = Integer.valueOf(compressionSize);
    + this.rowIndexStride = Integer.valueOf(rowIndexStride);
    + }
    +
    + @Override
    + public void write(NullWritable nullWritable,
    + OrcSerdeRow row) throws IOException {
    + if (writer == null) {
    + writer = OrcFile.createWriter(fs, path, row.getInspector(), stripeSize,
    + compress, compressionSize, rowIndexStride);
    + }
    + writer.addRow(row.getRow());
    + }
    +
    + @Override
    + public void write(Writable row) throws IOException {
    + OrcSerdeRow serdeRow = (OrcSerdeRow) row;
    + if (writer == null) {
    + writer = OrcFile.createWriter(fs, path, serdeRow.getInspector(),
    + stripeSize, compress, compressionSize, rowIndexStride);
    + }
    + writer.addRow(serdeRow.getRow());
    + }
    +
    + @Override
    + public void close(Reporter reporter) throws IOException {
    + close(true);
    + }
    +
    + @Override
    + public void close(boolean b) throws IOException {
    + // if we haven't written any rows, we need to create a file with a
    + // generic schema.
    + if (writer == null) {
    + // a row with no columns
    + ObjectInspector inspector = ObjectInspectorFactory.
    + getStandardStructObjectInspector(new ArrayList<String>(),
    + new ArrayList<ObjectInspector>());
    + writer = OrcFile.createWriter(fs, path, inspector, stripeSize,
    + compress, compressionSize, rowIndexStride);
    + }
    + writer.close();
    + }
    + }
    +
    + @Override
    + public RecordWriter<NullWritable, OrcSerdeRow>
    + getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
    + Progressable reporter) throws IOException {
    + return new OrcRecordWriter(fileSystem, new Path(name), conf,
    + OrcFile.DEFAULT_STRIPE_SIZE, OrcFile.DEFAULT_COMPRESSION,
    + OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE, OrcFile.DEFAULT_ROW_INDEX_STRIDE);
    + }
    +
    + @Override
    + public FileSinkOperator.RecordWriter
    + getHiveRecordWriter(JobConf conf,
    + Path path,
    + Class<? extends Writable> valueClass,
    + boolean isCompressed,
    + Properties tableProperties,
    + Progressable reporter) throws IOException {
    + String stripeSize = tableProperties.getProperty(OrcFile.STRIPE_SIZE,
    + OrcFile.DEFAULT_STRIPE_SIZE);
    + String compression = tableProperties.getProperty(OrcFile.COMPRESSION,
    + OrcFile.DEFAULT_COMPRESSION);
    + String compressionSize =
    + tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE,
    + OrcFile.DEFAULT_COMPRESSION_BLOCK_SIZE);
    + String rowIndexStride =
    + tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE,
    + OrcFile.DEFAULT_ROW_INDEX_STRIDE);
    + if ("false".equals(tableProperties.getProperty(OrcFile.ENABLE_INDEXES))) {
    + rowIndexStride = "0";
    + }
    + return new OrcRecordWriter(path.getFileSystem(conf), path, conf,
    + stripeSize, compression, compressionSize, rowIndexStride);
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,132 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.hive.serde2.SerDe;
    +import org.apache.hadoop.hive.serde2.SerDeException;
    +import org.apache.hadoop.hive.serde2.SerDeStats;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
    +import org.apache.hadoop.io.Writable;
    +
    +import java.io.DataInput;
    +import java.io.DataOutput;
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Properties;
    +
    +/**
    + * A serde class for ORC.
    + * It transparently passes the object to/from the ORC file reader/writer.
    + */
    +public class OrcSerde implements SerDe {
    + private final OrcSerdeRow row = new OrcSerdeRow();
    + private ObjectInspector inspector = null;
    +
    + final class OrcSerdeRow implements Writable {
    + private Object realRow;
    + private ObjectInspector inspector;
    +
    + @Override
    + public void write(DataOutput dataOutput) throws IOException {
    + throw new UnsupportedOperationException("can't write the bundle");
    + }
    +
    + @Override
    + public void readFields(DataInput dataInput) throws IOException {
    + throw new UnsupportedOperationException("can't read the bundle");
    + }
    +
    + ObjectInspector getInspector() {
    + return inspector;
    + }
    +
    + Object getRow() {
    + return realRow;
    + }
    + }
    +
    + @Override
    + public void initialize(Configuration conf, Properties table) {
    + // Read the configuration parameters
    + String columnNameProperty = table.getProperty("columns");
    + // NOTE: if "columns.types" is missing, all columns will be of String type
    + String columnTypeProperty = table.getProperty("columns.types");
    +
    + // Parse the configuration parameters
    + ArrayList<String> columnNames = new ArrayList<String>();
    + if (columnNameProperty != null && columnNameProperty.length() > 0) {
    + for(String name: columnNameProperty.split(",")) {
    + columnNames.add(name);
    + }
    + }
    + if (columnTypeProperty == null) {
    + // Default type: all string
    + StringBuilder sb = new StringBuilder();
    + for (int i = 0; i < columnNames.size(); i++) {
    + if (i > 0) {
    + sb.append(":");
    + }
    + sb.append("string");
    + }
    + columnTypeProperty = sb.toString();
    + }
    +
    + ArrayList<TypeInfo> fieldTypes =
    + TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    + StructTypeInfo rootType = new StructTypeInfo();
    + rootType.setAllStructFieldNames(columnNames);
    + rootType.setAllStructFieldTypeInfos(fieldTypes);
    + inspector = OrcStruct.createObjectInspector(rootType);
    + }
    +
    + @Override
    + public Class<? extends Writable> getSerializedClass() {
    + return OrcSerdeRow.class;
    + }
    +
    + @Override
    + public Writable serialize(Object realRow, ObjectInspector inspector) {
    + row.realRow = realRow;
    + row.inspector = inspector;
    + return row;
    + }
    +
    + @Override
    + public Object deserialize(Writable writable) throws SerDeException {
    + return writable;
    + }
    +
    + @Override
    + public ObjectInspector getObjectInspector() throws SerDeException {
    + return inspector;
    + }
    +
    + /**
    + * Always returns null, since serialized size doesn't make sense in the
    + * context of ORC files.
    + * @return null
    + */
    + @Override
    + public SerDeStats getSerDeStats() {
    + return null;
    + }
    +}
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (added)
    +++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,9811 @@
    +// Generated by the protocol buffer compiler. DO NOT EDIT!
    +// source: orc_proto.proto
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +public final class OrcProto {
    + private OrcProto() {}
    + public static void registerAllExtensions(
    + com.google.protobuf.ExtensionRegistry registry) {
    + }
    + public enum CompressionKind
    + implements com.google.protobuf.ProtocolMessageEnum {
    + NONE(0, 0),
    + ZLIB(1, 1),
    + SNAPPY(2, 2),
    + LZO(3, 3),
    + ;
    +
    + public static final int NONE_VALUE = 0;
    + public static final int ZLIB_VALUE = 1;
    + public static final int SNAPPY_VALUE = 2;
    + public static final int LZO_VALUE = 3;
    +
    +
    + public final int getNumber() { return value; }
    +
    + public static CompressionKind valueOf(int value) {
    + switch (value) {
    + case 0: return NONE;
    + case 1: return ZLIB;
    + case 2: return SNAPPY;
    + case 3: return LZO;
    + default: return null;
    + }
    + }
    +
    + public static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
    + internalGetValueMap() {
    + return internalValueMap;
    + }
    + private static com.google.protobuf.Internal.EnumLiteMap<CompressionKind>
    + internalValueMap =
    + new com.google.protobuf.Internal.EnumLiteMap<CompressionKind>() {
    + public CompressionKind findValueByNumber(int number) {
    + return CompressionKind.valueOf(number);
    + }
    + };
    +
    + public final com.google.protobuf.Descriptors.EnumValueDescriptor
    + getValueDescriptor() {
    + return getDescriptor().getValues().get(index);
    + }
    + public final com.google.protobuf.Descriptors.EnumDescriptor
    + getDescriptorForType() {
    + return getDescriptor();
    + }
    + public static final com.google.protobuf.Descriptors.EnumDescriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.getDescriptor().getEnumTypes().get(0);
    + }
    +
    + private static final CompressionKind[] VALUES = {
    + NONE, ZLIB, SNAPPY, LZO,
    + };
    +
    + public static CompressionKind valueOf(
    + com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
    + if (desc.getType() != getDescriptor()) {
    + throw new java.lang.IllegalArgumentException(
    + "EnumValueDescriptor is not for this type.");
    + }
    + return VALUES[desc.getIndex()];
    + }
    +
    + private final int index;
    + private final int value;
    +
    + private CompressionKind(int index, int value) {
    + this.index = index;
    + this.value = value;
    + }
    +
    + // @@protoc_insertion_point(enum_scope:org.apache.hadoop.hive.ql.io.orc.CompressionKind)
    + }
    +
    + public interface IntegerStatisticsOrBuilder
    + extends com.google.protobuf.MessageOrBuilder {
    +
    + // optional sint64 minimum = 1;
    + boolean hasMinimum();
    + long getMinimum();
    +
    + // optional sint64 maximum = 2;
    + boolean hasMaximum();
    + long getMaximum();
    +
    + // optional sint64 sum = 3;
    + boolean hasSum();
    + long getSum();
    + }
    + public static final class IntegerStatistics extends
    + com.google.protobuf.GeneratedMessage
    + implements IntegerStatisticsOrBuilder {
    + // Use IntegerStatistics.newBuilder() to construct.
    + private IntegerStatistics(Builder builder) {
    + super(builder);
    + }
    + private IntegerStatistics(boolean noInit) {}
    +
    + private static final IntegerStatistics defaultInstance;
    + public static IntegerStatistics getDefaultInstance() {
    + return defaultInstance;
    + }
    +
    + public IntegerStatistics getDefaultInstanceForType() {
    + return defaultInstance;
    + }
    +
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_IntegerStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_IntegerStatistics_fieldAccessorTable;
    + }
    +
    + private int bitField0_;
    + // optional sint64 minimum = 1;
    + public static final int MINIMUM_FIELD_NUMBER = 1;
    + private long minimum_;
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public long getMinimum() {
    + return minimum_;
    + }
    +
    + // optional sint64 maximum = 2;
    + public static final int MAXIMUM_FIELD_NUMBER = 2;
    + private long maximum_;
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public long getMaximum() {
    + return maximum_;
    + }
    +
    + // optional sint64 sum = 3;
    + public static final int SUM_FIELD_NUMBER = 3;
    + private long sum_;
    + public boolean hasSum() {
    + return ((bitField0_ & 0x00000004) == 0x00000004);
    + }
    + public long getSum() {
    + return sum_;
    + }
    +
    + private void initFields() {
    + minimum_ = 0L;
    + maximum_ = 0L;
    + sum_ = 0L;
    + }
    + private byte memoizedIsInitialized = -1;
    + public final boolean isInitialized() {
    + byte isInitialized = memoizedIsInitialized;
    + if (isInitialized != -1) return isInitialized == 1;
    +
    + memoizedIsInitialized = 1;
    + return true;
    + }
    +
    + public void writeTo(com.google.protobuf.CodedOutputStream output)
    + throws java.io.IOException {
    + getSerializedSize();
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + output.writeSInt64(1, minimum_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + output.writeSInt64(2, maximum_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + output.writeSInt64(3, sum_);
    + }
    + getUnknownFields().writeTo(output);
    + }
    +
    + private int memoizedSerializedSize = -1;
    + public int getSerializedSize() {
    + int size = memoizedSerializedSize;
    + if (size != -1) return size;
    +
    + size = 0;
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeSInt64Size(1, minimum_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeSInt64Size(2, maximum_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeSInt64Size(3, sum_);
    + }
    + size += getUnknownFields().getSerializedSize();
    + memoizedSerializedSize = size;
    + return size;
    + }
    +
    + private static final long serialVersionUID = 0L;
    + @java.lang.Override
    + protected java.lang.Object writeReplace()
    + throws java.io.ObjectStreamException {
    + return super.writeReplace();
    + }
    +
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + com.google.protobuf.ByteString data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + com.google.protobuf.ByteString data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(byte[] data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + byte[] data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseDelimitedFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseDelimitedFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    +
    + public static Builder newBuilder() { return Builder.create(); }
    + public Builder newBuilderForType() { return newBuilder(); }
    + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics prototype) {
    + return newBuilder().mergeFrom(prototype);
    + }
    + public Builder toBuilder() { return newBuilder(this); }
    +
    + @java.lang.Override
    + protected Builder newBuilderForType(
    + com.google.protobuf.GeneratedMessage.BuilderParent parent) {
    + Builder builder = new Builder(parent);
    + return builder;
    + }
    + public static final class Builder extends
    + com.google.protobuf.GeneratedMessage.Builder<Builder>
    + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatisticsOrBuilder {
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_IntegerStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_IntegerStatistics_fieldAccessorTable;
    + }
    +
    + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.newBuilder()
    + private Builder() {
    + maybeForceBuilderInitialization();
    + }
    +
    + private Builder(BuilderParent parent) {
    + super(parent);
    + maybeForceBuilderInitialization();
    + }
    + private void maybeForceBuilderInitialization() {
    + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
    + }
    + }
    + private static Builder create() {
    + return new Builder();
    + }
    +
    + public Builder clear() {
    + super.clear();
    + minimum_ = 0L;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + maximum_ = 0L;
    + bitField0_ = (bitField0_ & ~0x00000002);
    + sum_ = 0L;
    + bitField0_ = (bitField0_ & ~0x00000004);
    + return this;
    + }
    +
    + public Builder clone() {
    + return create().mergeFrom(buildPartial());
    + }
    +
    + public com.google.protobuf.Descriptors.Descriptor
    + getDescriptorForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDescriptor();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics getDefaultInstanceForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics build() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(result);
    + }
    + return result;
    + }
    +
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics buildParsed()
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(
    + result).asInvalidProtocolBufferException();
    + }
    + return result;
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics buildPartial() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics(this);
    + int from_bitField0_ = bitField0_;
    + int to_bitField0_ = 0;
    + if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
    + to_bitField0_ |= 0x00000001;
    + }
    + result.minimum_ = minimum_;
    + if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
    + to_bitField0_ |= 0x00000002;
    + }
    + result.maximum_ = maximum_;
    + if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
    + to_bitField0_ |= 0x00000004;
    + }
    + result.sum_ = sum_;
    + result.bitField0_ = to_bitField0_;
    + onBuilt();
    + return result;
    + }
    +
    + public Builder mergeFrom(com.google.protobuf.Message other) {
    + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics) {
    + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics)other);
    + } else {
    + super.mergeFrom(other);
    + return this;
    + }
    + }
    +
    + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics other) {
    + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance()) return this;
    + if (other.hasMinimum()) {
    + setMinimum(other.getMinimum());
    + }
    + if (other.hasMaximum()) {
    + setMaximum(other.getMaximum());
    + }
    + if (other.hasSum()) {
    + setSum(other.getSum());
    + }
    + this.mergeUnknownFields(other.getUnknownFields());
    + return this;
    + }
    +
    + public final boolean isInitialized() {
    + return true;
    + }
    +
    + public Builder mergeFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + com.google.protobuf.UnknownFieldSet.Builder unknownFields =
    + com.google.protobuf.UnknownFieldSet.newBuilder(
    + this.getUnknownFields());
    + while (true) {
    + int tag = input.readTag();
    + switch (tag) {
    + case 0:
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + default: {
    + if (!parseUnknownField(input, unknownFields,
    + extensionRegistry, tag)) {
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + }
    + break;
    + }
    + case 8: {
    + bitField0_ |= 0x00000001;
    + minimum_ = input.readSInt64();
    + break;
    + }
    + case 16: {
    + bitField0_ |= 0x00000002;
    + maximum_ = input.readSInt64();
    + break;
    + }
    + case 24: {
    + bitField0_ |= 0x00000004;
    + sum_ = input.readSInt64();
    + break;
    + }
    + }
    + }
    + }
    +
    + private int bitField0_;
    +
    + // optional sint64 minimum = 1;
    + private long minimum_ ;
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public long getMinimum() {
    + return minimum_;
    + }
    + public Builder setMinimum(long value) {
    + bitField0_ |= 0x00000001;
    + minimum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMinimum() {
    + bitField0_ = (bitField0_ & ~0x00000001);
    + minimum_ = 0L;
    + onChanged();
    + return this;
    + }
    +
    + // optional sint64 maximum = 2;
    + private long maximum_ ;
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public long getMaximum() {
    + return maximum_;
    + }
    + public Builder setMaximum(long value) {
    + bitField0_ |= 0x00000002;
    + maximum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMaximum() {
    + bitField0_ = (bitField0_ & ~0x00000002);
    + maximum_ = 0L;
    + onChanged();
    + return this;
    + }
    +
    + // optional sint64 sum = 3;
    + private long sum_ ;
    + public boolean hasSum() {
    + return ((bitField0_ & 0x00000004) == 0x00000004);
    + }
    + public long getSum() {
    + return sum_;
    + }
    + public Builder setSum(long value) {
    + bitField0_ |= 0x00000004;
    + sum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearSum() {
    + bitField0_ = (bitField0_ & ~0x00000004);
    + sum_ = 0L;
    + onChanged();
    + return this;
    + }
    +
    + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.IntegerStatistics)
    + }
    +
    + static {
    + defaultInstance = new IntegerStatistics(true);
    + defaultInstance.initFields();
    + }
    +
    + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.IntegerStatistics)
    + }
    +
    + public interface DoubleStatisticsOrBuilder
    + extends com.google.protobuf.MessageOrBuilder {
    +
    + // optional double minimum = 1;
    + boolean hasMinimum();
    + double getMinimum();
    +
    + // optional double maximum = 2;
    + boolean hasMaximum();
    + double getMaximum();
    +
    + // optional double sum = 3;
    + boolean hasSum();
    + double getSum();
    + }
    + public static final class DoubleStatistics extends
    + com.google.protobuf.GeneratedMessage
    + implements DoubleStatisticsOrBuilder {
    + // Use DoubleStatistics.newBuilder() to construct.
    + private DoubleStatistics(Builder builder) {
    + super(builder);
    + }
    + private DoubleStatistics(boolean noInit) {}
    +
    + private static final DoubleStatistics defaultInstance;
    + public static DoubleStatistics getDefaultInstance() {
    + return defaultInstance;
    + }
    +
    + public DoubleStatistics getDefaultInstanceForType() {
    + return defaultInstance;
    + }
    +
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DoubleStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DoubleStatistics_fieldAccessorTable;
    + }
    +
    + private int bitField0_;
    + // optional double minimum = 1;
    + public static final int MINIMUM_FIELD_NUMBER = 1;
    + private double minimum_;
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public double getMinimum() {
    + return minimum_;
    + }
    +
    + // optional double maximum = 2;
    + public static final int MAXIMUM_FIELD_NUMBER = 2;
    + private double maximum_;
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public double getMaximum() {
    + return maximum_;
    + }
    +
    + // optional double sum = 3;
    + public static final int SUM_FIELD_NUMBER = 3;
    + private double sum_;
    + public boolean hasSum() {
    + return ((bitField0_ & 0x00000004) == 0x00000004);
    + }
    + public double getSum() {
    + return sum_;
    + }
    +
    + private void initFields() {
    + minimum_ = 0D;
    + maximum_ = 0D;
    + sum_ = 0D;
    + }
    + private byte memoizedIsInitialized = -1;
    + public final boolean isInitialized() {
    + byte isInitialized = memoizedIsInitialized;
    + if (isInitialized != -1) return isInitialized == 1;
    +
    + memoizedIsInitialized = 1;
    + return true;
    + }
    +
    + public void writeTo(com.google.protobuf.CodedOutputStream output)
    + throws java.io.IOException {
    + getSerializedSize();
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + output.writeDouble(1, minimum_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + output.writeDouble(2, maximum_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + output.writeDouble(3, sum_);
    + }
    + getUnknownFields().writeTo(output);
    + }
    +
    + private int memoizedSerializedSize = -1;
    + public int getSerializedSize() {
    + int size = memoizedSerializedSize;
    + if (size != -1) return size;
    +
    + size = 0;
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeDoubleSize(1, minimum_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeDoubleSize(2, maximum_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeDoubleSize(3, sum_);
    + }
    + size += getUnknownFields().getSerializedSize();
    + memoizedSerializedSize = size;
    + return size;
    + }
    +
    + private static final long serialVersionUID = 0L;
    + @java.lang.Override
    + protected java.lang.Object writeReplace()
    + throws java.io.ObjectStreamException {
    + return super.writeReplace();
    + }
    +
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + com.google.protobuf.ByteString data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + com.google.protobuf.ByteString data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(byte[] data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + byte[] data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseDelimitedFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseDelimitedFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    +
    + public static Builder newBuilder() { return Builder.create(); }
    + public Builder newBuilderForType() { return newBuilder(); }
    + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics prototype) {
    + return newBuilder().mergeFrom(prototype);
    + }
    + public Builder toBuilder() { return newBuilder(this); }
    +
    + @java.lang.Override
    + protected Builder newBuilderForType(
    + com.google.protobuf.GeneratedMessage.BuilderParent parent) {
    + Builder builder = new Builder(parent);
    + return builder;
    + }
    + public static final class Builder extends
    + com.google.protobuf.GeneratedMessage.Builder<Builder>
    + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatisticsOrBuilder {
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DoubleStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_DoubleStatistics_fieldAccessorTable;
    + }
    +
    + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.newBuilder()
    + private Builder() {
    + maybeForceBuilderInitialization();
    + }
    +
    + private Builder(BuilderParent parent) {
    + super(parent);
    + maybeForceBuilderInitialization();
    + }
    + private void maybeForceBuilderInitialization() {
    + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
    + }
    + }
    + private static Builder create() {
    + return new Builder();
    + }
    +
    + public Builder clear() {
    + super.clear();
    + minimum_ = 0D;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + maximum_ = 0D;
    + bitField0_ = (bitField0_ & ~0x00000002);
    + sum_ = 0D;
    + bitField0_ = (bitField0_ & ~0x00000004);
    + return this;
    + }
    +
    + public Builder clone() {
    + return create().mergeFrom(buildPartial());
    + }
    +
    + public com.google.protobuf.Descriptors.Descriptor
    + getDescriptorForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDescriptor();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics getDefaultInstanceForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDefaultInstance();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics build() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(result);
    + }
    + return result;
    + }
    +
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics buildParsed()
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(
    + result).asInvalidProtocolBufferException();
    + }
    + return result;
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics buildPartial() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics(this);
    + int from_bitField0_ = bitField0_;
    + int to_bitField0_ = 0;
    + if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
    + to_bitField0_ |= 0x00000001;
    + }
    + result.minimum_ = minimum_;
    + if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
    + to_bitField0_ |= 0x00000002;
    + }
    + result.maximum_ = maximum_;
    + if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
    + to_bitField0_ |= 0x00000004;
    + }
    + result.sum_ = sum_;
    + result.bitField0_ = to_bitField0_;
    + onBuilt();
    + return result;
    + }
    +
    + public Builder mergeFrom(com.google.protobuf.Message other) {
    + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics) {
    + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics)other);
    + } else {
    + super.mergeFrom(other);
    + return this;
    + }
    + }
    +
    + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics other) {
    + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDefaultInstance()) return this;
    + if (other.hasMinimum()) {
    + setMinimum(other.getMinimum());
    + }
    + if (other.hasMaximum()) {
    + setMaximum(other.getMaximum());
    + }
    + if (other.hasSum()) {
    + setSum(other.getSum());
    + }
    + this.mergeUnknownFields(other.getUnknownFields());
    + return this;
    + }
    +
    + public final boolean isInitialized() {
    + return true;
    + }
    +
    + public Builder mergeFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + com.google.protobuf.UnknownFieldSet.Builder unknownFields =
    + com.google.protobuf.UnknownFieldSet.newBuilder(
    + this.getUnknownFields());
    + while (true) {
    + int tag = input.readTag();
    + switch (tag) {
    + case 0:
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + default: {
    + if (!parseUnknownField(input, unknownFields,
    + extensionRegistry, tag)) {
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + }
    + break;
    + }
    + case 9: {
    + bitField0_ |= 0x00000001;
    + minimum_ = input.readDouble();
    + break;
    + }
    + case 17: {
    + bitField0_ |= 0x00000002;
    + maximum_ = input.readDouble();
    + break;
    + }
    + case 25: {
    + bitField0_ |= 0x00000004;
    + sum_ = input.readDouble();
    + break;
    + }
    + }
    + }
    + }
    +
    + private int bitField0_;
    +
    + // optional double minimum = 1;
    + private double minimum_ ;
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public double getMinimum() {
    + return minimum_;
    + }
    + public Builder setMinimum(double value) {
    + bitField0_ |= 0x00000001;
    + minimum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMinimum() {
    + bitField0_ = (bitField0_ & ~0x00000001);
    + minimum_ = 0D;
    + onChanged();
    + return this;
    + }
    +
    + // optional double maximum = 2;
    + private double maximum_ ;
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public double getMaximum() {
    + return maximum_;
    + }
    + public Builder setMaximum(double value) {
    + bitField0_ |= 0x00000002;
    + maximum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMaximum() {
    + bitField0_ = (bitField0_ & ~0x00000002);
    + maximum_ = 0D;
    + onChanged();
    + return this;
    + }
    +
    + // optional double sum = 3;
    + private double sum_ ;
    + public boolean hasSum() {
    + return ((bitField0_ & 0x00000004) == 0x00000004);
    + }
    + public double getSum() {
    + return sum_;
    + }
    + public Builder setSum(double value) {
    + bitField0_ |= 0x00000004;
    + sum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearSum() {
    + bitField0_ = (bitField0_ & ~0x00000004);
    + sum_ = 0D;
    + onChanged();
    + return this;
    + }
    +
    + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.DoubleStatistics)
    + }
    +
    + static {
    + defaultInstance = new DoubleStatistics(true);
    + defaultInstance.initFields();
    + }
    +
    + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.DoubleStatistics)
    + }
    +
    + public interface StringStatisticsOrBuilder
    + extends com.google.protobuf.MessageOrBuilder {
    +
    + // optional string minimum = 1;
    + boolean hasMinimum();
    + String getMinimum();
    +
    + // optional string maximum = 2;
    + boolean hasMaximum();
    + String getMaximum();
    + }
    + public static final class StringStatistics extends
    + com.google.protobuf.GeneratedMessage
    + implements StringStatisticsOrBuilder {
    + // Use StringStatistics.newBuilder() to construct.
    + private StringStatistics(Builder builder) {
    + super(builder);
    + }
    + private StringStatistics(boolean noInit) {}
    +
    + private static final StringStatistics defaultInstance;
    + public static StringStatistics getDefaultInstance() {
    + return defaultInstance;
    + }
    +
    + public StringStatistics getDefaultInstanceForType() {
    + return defaultInstance;
    + }
    +
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_fieldAccessorTable;
    + }
    +
    + private int bitField0_;
    + // optional string minimum = 1;
    + public static final int MINIMUM_FIELD_NUMBER = 1;
    + private java.lang.Object minimum_;
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public String getMinimum() {
    + java.lang.Object ref = minimum_;
    + if (ref instanceof String) {
    + return (String) ref;
    + } else {
    + com.google.protobuf.ByteString bs =
    + (com.google.protobuf.ByteString) ref;
    + String s = bs.toStringUtf8();
    + if (com.google.protobuf.Internal.isValidUtf8(bs)) {
    + minimum_ = s;
    + }
    + return s;
    + }
    + }
    + private com.google.protobuf.ByteString getMinimumBytes() {
    + java.lang.Object ref = minimum_;
    + if (ref instanceof String) {
    + com.google.protobuf.ByteString b =
    + com.google.protobuf.ByteString.copyFromUtf8((String) ref);
    + minimum_ = b;
    + return b;
    + } else {
    + return (com.google.protobuf.ByteString) ref;
    + }
    + }
    +
    + // optional string maximum = 2;
    + public static final int MAXIMUM_FIELD_NUMBER = 2;
    + private java.lang.Object maximum_;
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public String getMaximum() {
    + java.lang.Object ref = maximum_;
    + if (ref instanceof String) {
    + return (String) ref;
    + } else {
    + com.google.protobuf.ByteString bs =
    + (com.google.protobuf.ByteString) ref;
    + String s = bs.toStringUtf8();
    + if (com.google.protobuf.Internal.isValidUtf8(bs)) {
    + maximum_ = s;
    + }
    + return s;
    + }
    + }
    + private com.google.protobuf.ByteString getMaximumBytes() {
    + java.lang.Object ref = maximum_;
    + if (ref instanceof String) {
    + com.google.protobuf.ByteString b =
    + com.google.protobuf.ByteString.copyFromUtf8((String) ref);
    + maximum_ = b;
    + return b;
    + } else {
    + return (com.google.protobuf.ByteString) ref;
    + }
    + }
    +
    + private void initFields() {
    + minimum_ = "";
    + maximum_ = "";
    + }
    + private byte memoizedIsInitialized = -1;
    + public final boolean isInitialized() {
    + byte isInitialized = memoizedIsInitialized;
    + if (isInitialized != -1) return isInitialized == 1;
    +
    + memoizedIsInitialized = 1;
    + return true;
    + }
    +
    + public void writeTo(com.google.protobuf.CodedOutputStream output)
    + throws java.io.IOException {
    + getSerializedSize();
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + output.writeBytes(1, getMinimumBytes());
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + output.writeBytes(2, getMaximumBytes());
    + }
    + getUnknownFields().writeTo(output);
    + }
    +
    + private int memoizedSerializedSize = -1;
    + public int getSerializedSize() {
    + int size = memoizedSerializedSize;
    + if (size != -1) return size;
    +
    + size = 0;
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeBytesSize(1, getMinimumBytes());
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeBytesSize(2, getMaximumBytes());
    + }
    + size += getUnknownFields().getSerializedSize();
    + memoizedSerializedSize = size;
    + return size;
    + }
    +
    + private static final long serialVersionUID = 0L;
    + @java.lang.Override
    + protected java.lang.Object writeReplace()
    + throws java.io.ObjectStreamException {
    + return super.writeReplace();
    + }
    +
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + com.google.protobuf.ByteString data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + com.google.protobuf.ByteString data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(byte[] data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + byte[] data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseDelimitedFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseDelimitedFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    +
    + public static Builder newBuilder() { return Builder.create(); }
    + public Builder newBuilderForType() { return newBuilder(); }
    + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics prototype) {
    + return newBuilder().mergeFrom(prototype);
    + }
    + public Builder toBuilder() { return newBuilder(this); }
    +
    + @java.lang.Override
    + protected Builder newBuilderForType(
    + com.google.protobuf.GeneratedMessage.BuilderParent parent) {
    + Builder builder = new Builder(parent);
    + return builder;
    + }
    + public static final class Builder extends
    + com.google.protobuf.GeneratedMessage.Builder<Builder>
    + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatisticsOrBuilder {
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_StringStatistics_fieldAccessorTable;
    + }
    +
    + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.newBuilder()
    + private Builder() {
    + maybeForceBuilderInitialization();
    + }
    +
    + private Builder(BuilderParent parent) {
    + super(parent);
    + maybeForceBuilderInitialization();
    + }
    + private void maybeForceBuilderInitialization() {
    + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
    + }
    + }
    + private static Builder create() {
    + return new Builder();
    + }
    +
    + public Builder clear() {
    + super.clear();
    + minimum_ = "";
    + bitField0_ = (bitField0_ & ~0x00000001);
    + maximum_ = "";
    + bitField0_ = (bitField0_ & ~0x00000002);
    + return this;
    + }
    +
    + public Builder clone() {
    + return create().mergeFrom(buildPartial());
    + }
    +
    + public com.google.protobuf.Descriptors.Descriptor
    + getDescriptorForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDescriptor();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics getDefaultInstanceForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDefaultInstance();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics build() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(result);
    + }
    + return result;
    + }
    +
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics buildParsed()
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(
    + result).asInvalidProtocolBufferException();
    + }
    + return result;
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics buildPartial() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics(this);
    + int from_bitField0_ = bitField0_;
    + int to_bitField0_ = 0;
    + if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
    + to_bitField0_ |= 0x00000001;
    + }
    + result.minimum_ = minimum_;
    + if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
    + to_bitField0_ |= 0x00000002;
    + }
    + result.maximum_ = maximum_;
    + result.bitField0_ = to_bitField0_;
    + onBuilt();
    + return result;
    + }
    +
    + public Builder mergeFrom(com.google.protobuf.Message other) {
    + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics) {
    + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics)other);
    + } else {
    + super.mergeFrom(other);
    + return this;
    + }
    + }
    +
    + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics other) {
    + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDefaultInstance()) return this;
    + if (other.hasMinimum()) {
    + setMinimum(other.getMinimum());
    + }
    + if (other.hasMaximum()) {
    + setMaximum(other.getMaximum());
    + }
    + this.mergeUnknownFields(other.getUnknownFields());
    + return this;
    + }
    +
    + public final boolean isInitialized() {
    + return true;
    + }
    +
    + public Builder mergeFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + com.google.protobuf.UnknownFieldSet.Builder unknownFields =
    + com.google.protobuf.UnknownFieldSet.newBuilder(
    + this.getUnknownFields());
    + while (true) {
    + int tag = input.readTag();
    + switch (tag) {
    + case 0:
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + default: {
    + if (!parseUnknownField(input, unknownFields,
    + extensionRegistry, tag)) {
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + }
    + break;
    + }
    + case 10: {
    + bitField0_ |= 0x00000001;
    + minimum_ = input.readBytes();
    + break;
    + }
    + case 18: {
    + bitField0_ |= 0x00000002;
    + maximum_ = input.readBytes();
    + break;
    + }
    + }
    + }
    + }
    +
    + private int bitField0_;
    +
    + // optional string minimum = 1;
    + private java.lang.Object minimum_ = "";
    + public boolean hasMinimum() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public String getMinimum() {
    + java.lang.Object ref = minimum_;
    + if (!(ref instanceof String)) {
    + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8();
    + minimum_ = s;
    + return s;
    + } else {
    + return (String) ref;
    + }
    + }
    + public Builder setMinimum(String value) {
    + if (value == null) {
    + throw new NullPointerException();
    + }
    + bitField0_ |= 0x00000001;
    + minimum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMinimum() {
    + bitField0_ = (bitField0_ & ~0x00000001);
    + minimum_ = getDefaultInstance().getMinimum();
    + onChanged();
    + return this;
    + }
    + void setMinimum(com.google.protobuf.ByteString value) {
    + bitField0_ |= 0x00000001;
    + minimum_ = value;
    + onChanged();
    + }
    +
    + // optional string maximum = 2;
    + private java.lang.Object maximum_ = "";
    + public boolean hasMaximum() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public String getMaximum() {
    + java.lang.Object ref = maximum_;
    + if (!(ref instanceof String)) {
    + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8();
    + maximum_ = s;
    + return s;
    + } else {
    + return (String) ref;
    + }
    + }
    + public Builder setMaximum(String value) {
    + if (value == null) {
    + throw new NullPointerException();
    + }
    + bitField0_ |= 0x00000002;
    + maximum_ = value;
    + onChanged();
    + return this;
    + }
    + public Builder clearMaximum() {
    + bitField0_ = (bitField0_ & ~0x00000002);
    + maximum_ = getDefaultInstance().getMaximum();
    + onChanged();
    + return this;
    + }
    + void setMaximum(com.google.protobuf.ByteString value) {
    + bitField0_ |= 0x00000002;
    + maximum_ = value;
    + onChanged();
    + }
    +
    + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.StringStatistics)
    + }
    +
    + static {
    + defaultInstance = new StringStatistics(true);
    + defaultInstance.initFields();
    + }
    +
    + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.StringStatistics)
    + }
    +
    + public interface BucketStatisticsOrBuilder
    + extends com.google.protobuf.MessageOrBuilder {
    +
    + // repeated uint64 count = 1 [packed = true];
    + java.util.List<java.lang.Long> getCountList();
    + int getCountCount();
    + long getCount(int index);
    + }
    + public static final class BucketStatistics extends
    + com.google.protobuf.GeneratedMessage
    + implements BucketStatisticsOrBuilder {
    + // Use BucketStatistics.newBuilder() to construct.
    + private BucketStatistics(Builder builder) {
    + super(builder);
    + }
    + private BucketStatistics(boolean noInit) {}
    +
    + private static final BucketStatistics defaultInstance;
    + public static BucketStatistics getDefaultInstance() {
    + return defaultInstance;
    + }
    +
    + public BucketStatistics getDefaultInstanceForType() {
    + return defaultInstance;
    + }
    +
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_fieldAccessorTable;
    + }
    +
    + // repeated uint64 count = 1 [packed = true];
    + public static final int COUNT_FIELD_NUMBER = 1;
    + private java.util.List<java.lang.Long> count_;
    + public java.util.List<java.lang.Long>
    + getCountList() {
    + return count_;
    + }
    + public int getCountCount() {
    + return count_.size();
    + }
    + public long getCount(int index) {
    + return count_.get(index);
    + }
    + private int countMemoizedSerializedSize = -1;
    +
    + private void initFields() {
    + count_ = java.util.Collections.emptyList();;
    + }
    + private byte memoizedIsInitialized = -1;
    + public final boolean isInitialized() {
    + byte isInitialized = memoizedIsInitialized;
    + if (isInitialized != -1) return isInitialized == 1;
    +
    + memoizedIsInitialized = 1;
    + return true;
    + }
    +
    + public void writeTo(com.google.protobuf.CodedOutputStream output)
    + throws java.io.IOException {
    + getSerializedSize();
    + if (getCountList().size() > 0) {
    + output.writeRawVarint32(10);
    + output.writeRawVarint32(countMemoizedSerializedSize);
    + }
    + for (int i = 0; i < count_.size(); i++) {
    + output.writeUInt64NoTag(count_.get(i));
    + }
    + getUnknownFields().writeTo(output);
    + }
    +
    + private int memoizedSerializedSize = -1;
    + public int getSerializedSize() {
    + int size = memoizedSerializedSize;
    + if (size != -1) return size;
    +
    + size = 0;
    + {
    + int dataSize = 0;
    + for (int i = 0; i < count_.size(); i++) {
    + dataSize += com.google.protobuf.CodedOutputStream
    + .computeUInt64SizeNoTag(count_.get(i));
    + }
    + size += dataSize;
    + if (!getCountList().isEmpty()) {
    + size += 1;
    + size += com.google.protobuf.CodedOutputStream
    + .computeInt32SizeNoTag(dataSize);
    + }
    + countMemoizedSerializedSize = dataSize;
    + }
    + size += getUnknownFields().getSerializedSize();
    + memoizedSerializedSize = size;
    + return size;
    + }
    +
    + private static final long serialVersionUID = 0L;
    + @java.lang.Override
    + protected java.lang.Object writeReplace()
    + throws java.io.ObjectStreamException {
    + return super.writeReplace();
    + }
    +
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + com.google.protobuf.ByteString data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + com.google.protobuf.ByteString data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(byte[] data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + byte[] data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseDelimitedFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseDelimitedFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    +
    + public static Builder newBuilder() { return Builder.create(); }
    + public Builder newBuilderForType() { return newBuilder(); }
    + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics prototype) {
    + return newBuilder().mergeFrom(prototype);
    + }
    + public Builder toBuilder() { return newBuilder(this); }
    +
    + @java.lang.Override
    + protected Builder newBuilderForType(
    + com.google.protobuf.GeneratedMessage.BuilderParent parent) {
    + Builder builder = new Builder(parent);
    + return builder;
    + }
    + public static final class Builder extends
    + com.google.protobuf.GeneratedMessage.Builder<Builder>
    + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatisticsOrBuilder {
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_BucketStatistics_fieldAccessorTable;
    + }
    +
    + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.newBuilder()
    + private Builder() {
    + maybeForceBuilderInitialization();
    + }
    +
    + private Builder(BuilderParent parent) {
    + super(parent);
    + maybeForceBuilderInitialization();
    + }
    + private void maybeForceBuilderInitialization() {
    + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
    + }
    + }
    + private static Builder create() {
    + return new Builder();
    + }
    +
    + public Builder clear() {
    + super.clear();
    + count_ = java.util.Collections.emptyList();;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + return this;
    + }
    +
    + public Builder clone() {
    + return create().mergeFrom(buildPartial());
    + }
    +
    + public com.google.protobuf.Descriptors.Descriptor
    + getDescriptorForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDescriptor();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics getDefaultInstanceForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics build() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(result);
    + }
    + return result;
    + }
    +
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics buildParsed()
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(
    + result).asInvalidProtocolBufferException();
    + }
    + return result;
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics buildPartial() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics(this);
    + int from_bitField0_ = bitField0_;
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + count_ = java.util.Collections.unmodifiableList(count_);
    + bitField0_ = (bitField0_ & ~0x00000001);
    + }
    + result.count_ = count_;
    + onBuilt();
    + return result;
    + }
    +
    + public Builder mergeFrom(com.google.protobuf.Message other) {
    + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics) {
    + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics)other);
    + } else {
    + super.mergeFrom(other);
    + return this;
    + }
    + }
    +
    + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics other) {
    + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance()) return this;
    + if (!other.count_.isEmpty()) {
    + if (count_.isEmpty()) {
    + count_ = other.count_;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + } else {
    + ensureCountIsMutable();
    + count_.addAll(other.count_);
    + }
    + onChanged();
    + }
    + this.mergeUnknownFields(other.getUnknownFields());
    + return this;
    + }
    +
    + public final boolean isInitialized() {
    + return true;
    + }
    +
    + public Builder mergeFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + com.google.protobuf.UnknownFieldSet.Builder unknownFields =
    + com.google.protobuf.UnknownFieldSet.newBuilder(
    + this.getUnknownFields());
    + while (true) {
    + int tag = input.readTag();
    + switch (tag) {
    + case 0:
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + default: {
    + if (!parseUnknownField(input, unknownFields,
    + extensionRegistry, tag)) {
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + }
    + break;
    + }
    + case 8: {
    + ensureCountIsMutable();
    + count_.add(input.readUInt64());
    + break;
    + }
    + case 10: {
    + int length = input.readRawVarint32();
    + int limit = input.pushLimit(length);
    + while (input.getBytesUntilLimit() > 0) {
    + addCount(input.readUInt64());
    + }
    + input.popLimit(limit);
    + break;
    + }
    + }
    + }
    + }
    +
    + private int bitField0_;
    +
    + // repeated uint64 count = 1 [packed = true];
    + private java.util.List<java.lang.Long> count_ = java.util.Collections.emptyList();;
    + private void ensureCountIsMutable() {
    + if (!((bitField0_ & 0x00000001) == 0x00000001)) {
    + count_ = new java.util.ArrayList<java.lang.Long>(count_);
    + bitField0_ |= 0x00000001;
    + }
    + }
    + public java.util.List<java.lang.Long>
    + getCountList() {
    + return java.util.Collections.unmodifiableList(count_);
    + }
    + public int getCountCount() {
    + return count_.size();
    + }
    + public long getCount(int index) {
    + return count_.get(index);
    + }
    + public Builder setCount(
    + int index, long value) {
    + ensureCountIsMutable();
    + count_.set(index, value);
    + onChanged();
    + return this;
    + }
    + public Builder addCount(long value) {
    + ensureCountIsMutable();
    + count_.add(value);
    + onChanged();
    + return this;
    + }
    + public Builder addAllCount(
    + java.lang.Iterable<? extends java.lang.Long> values) {
    + ensureCountIsMutable();
    + super.addAll(values, count_);
    + onChanged();
    + return this;
    + }
    + public Builder clearCount() {
    + count_ = java.util.Collections.emptyList();;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + onChanged();
    + return this;
    + }
    +
    + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.BucketStatistics)
    + }
    +
    + static {
    + defaultInstance = new BucketStatistics(true);
    + defaultInstance.initFields();
    + }
    +
    + // @@protoc_insertion_point(class_scope:org.apache.hadoop.hive.ql.io.orc.BucketStatistics)
    + }
    +
    + public interface ColumnStatisticsOrBuilder
    + extends com.google.protobuf.MessageOrBuilder {
    +
    + // optional uint64 numberOfValues = 1;
    + boolean hasNumberOfValues();
    + long getNumberOfValues();
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.IntegerStatistics intStatistics = 2;
    + boolean hasIntStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics getIntStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatisticsOrBuilder getIntStatisticsOrBuilder();
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.DoubleStatistics doubleStatistics = 3;
    + boolean hasDoubleStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics getDoubleStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatisticsOrBuilder getDoubleStatisticsOrBuilder();
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.StringStatistics stringStatistics = 4;
    + boolean hasStringStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics getStringStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatisticsOrBuilder getStringStatisticsOrBuilder();
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.BucketStatistics bucketStatistics = 5;
    + boolean hasBucketStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics getBucketStatistics();
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatisticsOrBuilder getBucketStatisticsOrBuilder();
    + }
    + public static final class ColumnStatistics extends
    + com.google.protobuf.GeneratedMessage
    + implements ColumnStatisticsOrBuilder {
    + // Use ColumnStatistics.newBuilder() to construct.
    + private ColumnStatistics(Builder builder) {
    + super(builder);
    + }
    + private ColumnStatistics(boolean noInit) {}
    +
    + private static final ColumnStatistics defaultInstance;
    + public static ColumnStatistics getDefaultInstance() {
    + return defaultInstance;
    + }
    +
    + public ColumnStatistics getDefaultInstanceForType() {
    + return defaultInstance;
    + }
    +
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable;
    + }
    +
    + private int bitField0_;
    + // optional uint64 numberOfValues = 1;
    + public static final int NUMBEROFVALUES_FIELD_NUMBER = 1;
    + private long numberOfValues_;
    + public boolean hasNumberOfValues() {
    + return ((bitField0_ & 0x00000001) == 0x00000001);
    + }
    + public long getNumberOfValues() {
    + return numberOfValues_;
    + }
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.IntegerStatistics intStatistics = 2;
    + public static final int INTSTATISTICS_FIELD_NUMBER = 2;
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics intStatistics_;
    + public boolean hasIntStatistics() {
    + return ((bitField0_ & 0x00000002) == 0x00000002);
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics getIntStatistics() {
    + return intStatistics_;
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatisticsOrBuilder getIntStatisticsOrBuilder() {
    + return intStatistics_;
    + }
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.DoubleStatistics doubleStatistics = 3;
    + public static final int DOUBLESTATISTICS_FIELD_NUMBER = 3;
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics doubleStatistics_;
    + public boolean hasDoubleStatistics() {
    + return ((bitField0_ & 0x00000004) == 0x00000004);
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics getDoubleStatistics() {
    + return doubleStatistics_;
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatisticsOrBuilder getDoubleStatisticsOrBuilder() {
    + return doubleStatistics_;
    + }
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.StringStatistics stringStatistics = 4;
    + public static final int STRINGSTATISTICS_FIELD_NUMBER = 4;
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics stringStatistics_;
    + public boolean hasStringStatistics() {
    + return ((bitField0_ & 0x00000008) == 0x00000008);
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics getStringStatistics() {
    + return stringStatistics_;
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatisticsOrBuilder getStringStatisticsOrBuilder() {
    + return stringStatistics_;
    + }
    +
    + // optional .org.apache.hadoop.hive.ql.io.orc.BucketStatistics bucketStatistics = 5;
    + public static final int BUCKETSTATISTICS_FIELD_NUMBER = 5;
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics bucketStatistics_;
    + public boolean hasBucketStatistics() {
    + return ((bitField0_ & 0x00000010) == 0x00000010);
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics getBucketStatistics() {
    + return bucketStatistics_;
    + }
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatisticsOrBuilder getBucketStatisticsOrBuilder() {
    + return bucketStatistics_;
    + }
    +
    + private void initFields() {
    + numberOfValues_ = 0L;
    + intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
    + doubleStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDefaultInstance();
    + stringStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDefaultInstance();
    + bucketStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance();
    + }
    + private byte memoizedIsInitialized = -1;
    + public final boolean isInitialized() {
    + byte isInitialized = memoizedIsInitialized;
    + if (isInitialized != -1) return isInitialized == 1;
    +
    + memoizedIsInitialized = 1;
    + return true;
    + }
    +
    + public void writeTo(com.google.protobuf.CodedOutputStream output)
    + throws java.io.IOException {
    + getSerializedSize();
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + output.writeUInt64(1, numberOfValues_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + output.writeMessage(2, intStatistics_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + output.writeMessage(3, doubleStatistics_);
    + }
    + if (((bitField0_ & 0x00000008) == 0x00000008)) {
    + output.writeMessage(4, stringStatistics_);
    + }
    + if (((bitField0_ & 0x00000010) == 0x00000010)) {
    + output.writeMessage(5, bucketStatistics_);
    + }
    + getUnknownFields().writeTo(output);
    + }
    +
    + private int memoizedSerializedSize = -1;
    + public int getSerializedSize() {
    + int size = memoizedSerializedSize;
    + if (size != -1) return size;
    +
    + size = 0;
    + if (((bitField0_ & 0x00000001) == 0x00000001)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeUInt64Size(1, numberOfValues_);
    + }
    + if (((bitField0_ & 0x00000002) == 0x00000002)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeMessageSize(2, intStatistics_);
    + }
    + if (((bitField0_ & 0x00000004) == 0x00000004)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeMessageSize(3, doubleStatistics_);
    + }
    + if (((bitField0_ & 0x00000008) == 0x00000008)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeMessageSize(4, stringStatistics_);
    + }
    + if (((bitField0_ & 0x00000010) == 0x00000010)) {
    + size += com.google.protobuf.CodedOutputStream
    + .computeMessageSize(5, bucketStatistics_);
    + }
    + size += getUnknownFields().getSerializedSize();
    + memoizedSerializedSize = size;
    + return size;
    + }
    +
    + private static final long serialVersionUID = 0L;
    + @java.lang.Override
    + protected java.lang.Object writeReplace()
    + throws java.io.ObjectStreamException {
    + return super.writeReplace();
    + }
    +
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + com.google.protobuf.ByteString data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + com.google.protobuf.ByteString data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(byte[] data)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + byte[] data,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + return newBuilder().mergeFrom(data, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseDelimitedFrom(java.io.InputStream input)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseDelimitedFrom(
    + java.io.InputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + Builder builder = newBuilder();
    + if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
    + return builder.buildParsed();
    + } else {
    + return null;
    + }
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input).buildParsed();
    + }
    + public static org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics parseFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + return newBuilder().mergeFrom(input, extensionRegistry)
    + .buildParsed();
    + }
    +
    + public static Builder newBuilder() { return Builder.create(); }
    + public Builder newBuilderForType() { return newBuilder(); }
    + public static Builder newBuilder(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics prototype) {
    + return newBuilder().mergeFrom(prototype);
    + }
    + public Builder toBuilder() { return newBuilder(this); }
    +
    + @java.lang.Override
    + protected Builder newBuilderForType(
    + com.google.protobuf.GeneratedMessage.BuilderParent parent) {
    + Builder builder = new Builder(parent);
    + return builder;
    + }
    + public static final class Builder extends
    + com.google.protobuf.GeneratedMessage.Builder<Builder>
    + implements org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatisticsOrBuilder {
    + public static final com.google.protobuf.Descriptors.Descriptor
    + getDescriptor() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_descriptor;
    + }
    +
    + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
    + internalGetFieldAccessorTable() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.internal_static_org_apache_hadoop_hive_ql_io_orc_ColumnStatistics_fieldAccessorTable;
    + }
    +
    + // Construct using org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.newBuilder()
    + private Builder() {
    + maybeForceBuilderInitialization();
    + }
    +
    + private Builder(BuilderParent parent) {
    + super(parent);
    + maybeForceBuilderInitialization();
    + }
    + private void maybeForceBuilderInitialization() {
    + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) {
    + getIntStatisticsFieldBuilder();
    + getDoubleStatisticsFieldBuilder();
    + getStringStatisticsFieldBuilder();
    + getBucketStatisticsFieldBuilder();
    + }
    + }
    + private static Builder create() {
    + return new Builder();
    + }
    +
    + public Builder clear() {
    + super.clear();
    + numberOfValues_ = 0L;
    + bitField0_ = (bitField0_ & ~0x00000001);
    + if (intStatisticsBuilder_ == null) {
    + intStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.getDefaultInstance();
    + } else {
    + intStatisticsBuilder_.clear();
    + }
    + bitField0_ = (bitField0_ & ~0x00000002);
    + if (doubleStatisticsBuilder_ == null) {
    + doubleStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.getDefaultInstance();
    + } else {
    + doubleStatisticsBuilder_.clear();
    + }
    + bitField0_ = (bitField0_ & ~0x00000004);
    + if (stringStatisticsBuilder_ == null) {
    + stringStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.getDefaultInstance();
    + } else {
    + stringStatisticsBuilder_.clear();
    + }
    + bitField0_ = (bitField0_ & ~0x00000008);
    + if (bucketStatisticsBuilder_ == null) {
    + bucketStatistics_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.BucketStatistics.getDefaultInstance();
    + } else {
    + bucketStatisticsBuilder_.clear();
    + }
    + bitField0_ = (bitField0_ & ~0x00000010);
    + return this;
    + }
    +
    + public Builder clone() {
    + return create().mergeFrom(buildPartial());
    + }
    +
    + public com.google.protobuf.Descriptors.Descriptor
    + getDescriptorForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.getDescriptor();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics getDefaultInstanceForType() {
    + return org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.getDefaultInstance();
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics build() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(result);
    + }
    + return result;
    + }
    +
    + private org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics buildParsed()
    + throws com.google.protobuf.InvalidProtocolBufferException {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics result = buildPartial();
    + if (!result.isInitialized()) {
    + throw newUninitializedMessageException(
    + result).asInvalidProtocolBufferException();
    + }
    + return result;
    + }
    +
    + public org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics buildPartial() {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics(this);
    + int from_bitField0_ = bitField0_;
    + int to_bitField0_ = 0;
    + if (((from_bitField0_ & 0x00000001) == 0x00000001)) {
    + to_bitField0_ |= 0x00000001;
    + }
    + result.numberOfValues_ = numberOfValues_;
    + if (((from_bitField0_ & 0x00000002) == 0x00000002)) {
    + to_bitField0_ |= 0x00000002;
    + }
    + if (intStatisticsBuilder_ == null) {
    + result.intStatistics_ = intStatistics_;
    + } else {
    + result.intStatistics_ = intStatisticsBuilder_.build();
    + }
    + if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
    + to_bitField0_ |= 0x00000004;
    + }
    + if (doubleStatisticsBuilder_ == null) {
    + result.doubleStatistics_ = doubleStatistics_;
    + } else {
    + result.doubleStatistics_ = doubleStatisticsBuilder_.build();
    + }
    + if (((from_bitField0_ & 0x00000008) == 0x00000008)) {
    + to_bitField0_ |= 0x00000008;
    + }
    + if (stringStatisticsBuilder_ == null) {
    + result.stringStatistics_ = stringStatistics_;
    + } else {
    + result.stringStatistics_ = stringStatisticsBuilder_.build();
    + }
    + if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
    + to_bitField0_ |= 0x00000010;
    + }
    + if (bucketStatisticsBuilder_ == null) {
    + result.bucketStatistics_ = bucketStatistics_;
    + } else {
    + result.bucketStatistics_ = bucketStatisticsBuilder_.build();
    + }
    + result.bitField0_ = to_bitField0_;
    + onBuilt();
    + return result;
    + }
    +
    + public Builder mergeFrom(com.google.protobuf.Message other) {
    + if (other instanceof org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics) {
    + return mergeFrom((org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics)other);
    + } else {
    + super.mergeFrom(other);
    + return this;
    + }
    + }
    +
    + public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics other) {
    + if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.ColumnStatistics.getDefaultInstance()) return this;
    + if (other.hasNumberOfValues()) {
    + setNumberOfValues(other.getNumberOfValues());
    + }
    + if (other.hasIntStatistics()) {
    + mergeIntStatistics(other.getIntStatistics());
    + }
    + if (other.hasDoubleStatistics()) {
    + mergeDoubleStatistics(other.getDoubleStatistics());
    + }
    + if (other.hasStringStatistics()) {
    + mergeStringStatistics(other.getStringStatistics());
    + }
    + if (other.hasBucketStatistics()) {
    + mergeBucketStatistics(other.getBucketStatistics());
    + }
    + this.mergeUnknownFields(other.getUnknownFields());
    + return this;
    + }
    +
    + public final boolean isInitialized() {
    + return true;
    + }
    +
    + public Builder mergeFrom(
    + com.google.protobuf.CodedInputStream input,
    + com.google.protobuf.ExtensionRegistryLite extensionRegistry)
    + throws java.io.IOException {
    + com.google.protobuf.UnknownFieldSet.Builder unknownFields =
    + com.google.protobuf.UnknownFieldSet.newBuilder(
    + this.getUnknownFields());
    + while (true) {
    + int tag = input.readTag();
    + switch (tag) {
    + case 0:
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + default: {
    + if (!parseUnknownField(input, unknownFields,
    + extensionRegistry, tag)) {
    + this.setUnknownFields(unknownFields.build());
    + onChanged();
    + return this;
    + }
    + break;
    + }
    + case 8: {
    + bitField0_ |= 0x00000001;
    + numberOfValues_ = input.readUInt64();
    + break;
    + }
    + case 18: {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.Builder subBuilder = org.apache.hadoop.hive.ql.io.orc.OrcProto.IntegerStatistics.newBuilder();
    + if (hasIntStatistics()) {
    + subBuilder.mergeFrom(getIntStatistics());
    + }
    + input.readMessage(subBuilder, extensionRegistry);
    + setIntStatistics(subBuilder.buildPartial());
    + break;
    + }
    + case 26: {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.Builder subBuilder = org.apache.hadoop.hive.ql.io.orc.OrcProto.DoubleStatistics.newBuilder();
    + if (hasDoubleStatistics()) {
    + subBuilder.mergeFrom(getDoubleStatistics());
    + }
    + input.readMessage(subBuilder, extensionRegistry);
    + setDoubleStatistics(subBuilder.buildPartial());
    + break;
    + }
    + case 34: {
    + org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.Builder subBuilder = org.apache.hadoop.hive.ql.io.orc.OrcProto.StringStatistics.newBuilder();

    [... 7462 lines stripped ...]
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,360 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.fs.FileStatus;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.conf.HiveConf;
    +import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
    +import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
    +import org.apache.hadoop.hive.ql.io.InputFormatChecker;
    +import org.apache.hadoop.hive.serde2.SerDe;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
    +import org.apache.hadoop.io.NullWritable;
    +import org.apache.hadoop.io.Writable;
    +import org.apache.hadoop.mapred.FileInputFormat;
    +import org.apache.hadoop.mapred.InputFormat;
    +import org.apache.hadoop.mapred.InputSplit;
    +import org.apache.hadoop.mapred.JobConf;
    +import org.apache.hadoop.mapred.OutputFormat;
    +import org.apache.hadoop.mapred.RecordWriter;
    +import org.apache.hadoop.mapred.Reporter;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +import java.io.DataInput;
    +import java.io.DataOutput;
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Properties;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public class TestInputOutputFormat {
    +
    + Path workDir = new Path(System.getProperty("test.tmp.dir","target/test/tmp"));
    +
    + public static class MyRow implements Writable {
    + int x;
    + int y;
    + MyRow(int x, int y) {
    + this.x = x;
    + this.y = y;
    + }
    +
    + @Override
    + public void write(DataOutput dataOutput) throws IOException {
    + throw new UnsupportedOperationException("no write");
    + }
    +
    + @Override
    + public void readFields(DataInput dataInput) throws IOException {
    + throw new UnsupportedOperationException("no read");
    + }
    + }
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    + JobConf conf;
    + FileSystem fs;
    + Path testFilePath;
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new JobConf();
    + fs = FileSystem.getLocal(conf);
    + testFilePath = new Path(workDir, "TestInputOutputFormat." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + @Test
    + public void testInOutFormat() throws Exception {
    + Properties properties = new Properties();
    + StructObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = (StructObjectInspector)
    + ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + SerDe serde = new OrcSerde();
    + HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    + FileSinkOperator.RecordWriter writer =
    + outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
    + properties, Reporter.NULL);
    + writer.write(serde.serialize(new MyRow(1,2), inspector));
    + writer.write(serde.serialize(new MyRow(2,2), inspector));
    + writer.write(serde.serialize(new MyRow(3,2), inspector));
    + writer.close(true);
    + serde = new OrcSerde();
    + properties.setProperty("columns", "x,y");
    + properties.setProperty("columns.types", "int:int");
    + serde.initialize(conf, properties);
    + assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    + inspector = (StructObjectInspector) serde.getObjectInspector();
    + assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    + InputFormat<?,?> in = new OrcInputFormat();
    + FileInputFormat.setInputPaths(conf, testFilePath.toString());
    + InputSplit[] splits = in.getSplits(conf, 1);
    + assertEquals(1, splits.length);
    +
    + // the the validate input method
    + ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    + assertEquals(false,
    + ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    + fileList.add(fs.getFileStatus(testFilePath));
    + assertEquals(true,
    + ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    + fileList.add(fs.getFileStatus(workDir));
    + assertEquals(false,
    + ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    +
    +
    + // read the whole file
    + org.apache.hadoop.mapred.RecordReader reader =
    + in.getRecordReader(splits[0], conf, Reporter.NULL);
    + Object key = reader.createKey();
    + Writable value = (Writable) reader.createValue();
    + int rowNum = 0;
    + List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    + IntObjectInspector intInspector =
    + (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    + assertEquals(0.0, reader.getProgress(), 0.00001);
    + assertEquals(0, reader.getPos());
    + while (reader.next(key, value)) {
    + assertEquals(++rowNum, intInspector.get(inspector.
    + getStructFieldData(serde.deserialize(value), fields.get(0))));
    + assertEquals(2, intInspector.get(inspector.
    + getStructFieldData(serde.deserialize(value), fields.get(1))));
    + }
    + assertEquals(3, rowNum);
    + assertEquals(1.0, reader.getProgress(), 0.00001);
    + reader.close();
    +
    + // read just the first column
    + conf.set("hive.io.file.readcolumn.ids", "0");
    + reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    + key = reader.createKey();
    + value = (Writable) reader.createValue();
    + rowNum = 0;
    + fields = inspector.getAllStructFieldRefs();
    + while (reader.next(key, value)) {
    + assertEquals(++rowNum, intInspector.get(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    + }
    + assertEquals(3, rowNum);
    + reader.close();
    + }
    +
    + static class NestedRow implements Writable {
    + int z;
    + MyRow r;
    + NestedRow(int x, int y, int z) {
    + this.z = z;
    + this.r = new MyRow(x,y);
    + }
    +
    + @Override
    + public void write(DataOutput dataOutput) throws IOException {
    + throw new UnsupportedOperationException("unsupported");
    + }
    +
    + @Override
    + public void readFields(DataInput dataInput) throws IOException {
    + throw new UnsupportedOperationException("unsupported");
    + }
    + }
    +
    + @Test
    + public void testMROutput() throws Exception {
    + JobConf job = new JobConf(conf);
    + Properties properties = new Properties();
    + StructObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = (StructObjectInspector)
    + ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + SerDe serde = new OrcSerde();
    + OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    + RecordWriter writer =
    + outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
    + Reporter.NULL);
    + writer.write(NullWritable.get(),
    + serde.serialize(new NestedRow(1,2,3), inspector));
    + writer.write(NullWritable.get(),
    + serde.serialize(new NestedRow(4,5,6), inspector));
    + writer.write(NullWritable.get(),
    + serde.serialize(new NestedRow(7,8,9), inspector));
    + writer.close(Reporter.NULL);
    + serde = new OrcSerde();
    + properties.setProperty("columns", "z,r");
    + properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    + serde.initialize(conf, properties);
    + inspector = (StructObjectInspector) serde.getObjectInspector();
    + InputFormat<?,?> in = new OrcInputFormat();
    + FileInputFormat.setInputPaths(conf, testFilePath.toString());
    + InputSplit[] splits = in.getSplits(conf, 1);
    + assertEquals(1, splits.length);
    + conf.set("hive.io.file.readcolumn.ids", "1");
    + org.apache.hadoop.mapred.RecordReader reader =
    + in.getRecordReader(splits[0], conf, Reporter.NULL);
    + Object key = reader.createKey();
    + Object value = reader.createValue();
    + int rowNum = 0;
    + List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    + StructObjectInspector inner = (StructObjectInspector)
    + fields.get(1).getFieldObjectInspector();
    + List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    + IntObjectInspector intInspector =
    + (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    + while (reader.next(key, value)) {
    + assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
    + Object sub = inspector.getStructFieldData(value, fields.get(1));
    + assertEquals(3*rowNum+1, intInspector.get(inner.getStructFieldData(sub,
    + inFields.get(0))));
    + assertEquals(3*rowNum+2, intInspector.get(inner.getStructFieldData(sub,
    + inFields.get(1))));
    + rowNum += 1;
    + }
    + assertEquals(3, rowNum);
    + reader.close();
    +
    + }
    +
    + @Test
    + public void testEmptyFile() throws Exception {
    + JobConf job = new JobConf(conf);
    + Properties properties = new Properties();
    + HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    + FileSinkOperator.RecordWriter writer =
    + outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
    + properties, Reporter.NULL);
    + writer.close(true);
    + properties.setProperty("columns", "x,y");
    + properties.setProperty("columns.types", "int:int");
    + SerDe serde = new OrcSerde();
    + serde.initialize(conf, properties);
    + InputFormat<?,?> in = new OrcInputFormat();
    + FileInputFormat.setInputPaths(conf, testFilePath.toString());
    + InputSplit[] splits = in.getSplits(conf, 1);
    + assertEquals(1, splits.length);
    +
    + // read the whole file
    + conf.set("hive.io.file.readcolumn.ids", "0,1");
    + org.apache.hadoop.mapred.RecordReader reader =
    + in.getRecordReader(splits[0], conf, Reporter.NULL);
    + Object key = reader.createKey();
    + Object value = reader.createValue();
    + assertEquals(0.0, reader.getProgress(), 0.00001);
    + assertEquals(0, reader.getPos());
    + assertEquals(false, reader.next(key, value));
    + reader.close();
    + assertEquals(null, serde.getSerDeStats());
    + }
    +
    + static class StringRow implements Writable {
    + String str;
    + String str2;
    + StringRow(String s) {
    + str = s;
    + str2 = s;
    + }
    + @Override
    + public void write(DataOutput dataOutput) throws IOException {
    + throw new UnsupportedOperationException("no write");
    + }
    +
    + @Override
    + public void readFields(DataInput dataInput) throws IOException {
    + throw new UnsupportedOperationException("no read");
    + }
    + }
    +
    + @Test
    + public void testDefaultTypes() throws Exception {
    + JobConf job = new JobConf(conf);
    + Properties properties = new Properties();
    + StructObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = (StructObjectInspector)
    + ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + SerDe serde = new OrcSerde();
    + HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    + FileSinkOperator.RecordWriter writer =
    + outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
    + true, properties, Reporter.NULL);
    + writer.write(serde.serialize(new StringRow("owen"), inspector));
    + writer.write(serde.serialize(new StringRow("beth"), inspector));
    + writer.write(serde.serialize(new StringRow("laurel"), inspector));
    + writer.write(serde.serialize(new StringRow("hazen"), inspector));
    + writer.write(serde.serialize(new StringRow("colin"), inspector));
    + writer.write(serde.serialize(new StringRow("miles"), inspector));
    + writer.close(true);
    + serde = new OrcSerde();
    + properties.setProperty("columns", "str,str2");
    + serde.initialize(conf, properties);
    + inspector = (StructObjectInspector) serde.getObjectInspector();
    + assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    + InputFormat<?,?> in = new OrcInputFormat();
    + FileInputFormat.setInputPaths(conf, testFilePath.toString());
    + InputSplit[] splits = in.getSplits(conf, 1);
    + assertEquals(1, splits.length);
    +
    + // read the whole file
    + org.apache.hadoop.mapred.RecordReader reader =
    + in.getRecordReader(splits[0], conf, Reporter.NULL);
    + Object key = reader.createKey();
    + Writable value = (Writable) reader.createValue();
    + List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    + StringObjectInspector strInspector = (StringObjectInspector)
    + fields.get(0).getFieldObjectInspector();
    + assertEquals(true, reader.next(key, value));
    + assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(true, reader.next(key, value));
    + assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(true, reader.next(key, value));
    + assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(true, reader.next(key, value));
    + assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(true, reader.next(key, value));
    + assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(true, reader.next(key, value));
    + assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.
    + getStructFieldData(value, fields.get(0))));
    + assertEquals(false, reader.next(key, value));
    + reader.close();
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,896 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.io.ByteWritable;
    +import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    +import org.apache.hadoop.hive.serde2.io.ShortWritable;
    +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
    +import org.apache.hadoop.io.BooleanWritable;
    +import org.apache.hadoop.io.BytesWritable;
    +import org.apache.hadoop.io.FloatWritable;
    +import org.apache.hadoop.io.IntWritable;
    +import org.apache.hadoop.io.LongWritable;
    +import org.apache.hadoop.io.Text;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +import java.io.File;
    +import java.nio.ByteBuffer;
    +import java.sql.Timestamp;
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Random;
    +
    +import static junit.framework.Assert.*;
    +import static junit.framework.Assert.assertEquals;
    +
    +/**
    + * Tests for the top level reader/streamFactory of ORC files.
    + */
    +public class TestOrcFile {
    +
    + public static class InnerStruct {
    + int int1;
    + Text string1 = new Text();
    + InnerStruct(int int1, String string1) {
    + this.int1 = int1;
    + this.string1.set(string1);
    + }
    + }
    +
    + public static class MiddleStruct {
    + List<InnerStruct> list = new ArrayList<InnerStruct>();
    +
    + MiddleStruct(InnerStruct... items) {
    + list.clear();
    + for(InnerStruct item: items) {
    + list.add(item);
    + }
    + }
    + }
    +
    + public static class BigRow {
    + Boolean boolean1;
    + Byte byte1;
    + Short short1;
    + Integer int1;
    + Long long1;
    + Float float1;
    + Double double1;
    + BytesWritable bytes1;
    + Text string1;
    + MiddleStruct middle;
    + List<InnerStruct> list = new ArrayList<InnerStruct>();
    + Map<Text, InnerStruct> map = new HashMap<Text, InnerStruct>();
    +
    + BigRow(Boolean b1, Byte b2, Short s1, Integer i1, Long l1, Float f1,
    + Double d1,
    + BytesWritable b3, String s2, MiddleStruct m1,
    + List<InnerStruct> l2, Map<Text, InnerStruct> m2) {
    + this.boolean1 = b1;
    + this.byte1 = b2;
    + this.short1 = s1;
    + this.int1 = i1;
    + this.long1 = l1;
    + this.float1 = f1;
    + this.double1 = d1;
    + this.bytes1 = b3;
    + if (s2 == null) {
    + this.string1 = null;
    + } else {
    + this.string1 = new Text(s2);
    + }
    + this.middle = m1;
    + this.list = l2;
    + this.map = m2;
    + }
    + }
    +
    + private static InnerStruct inner(int i, String s) {
    + return new InnerStruct(i, s);
    + }
    +
    + private static Map<Text, InnerStruct> map(InnerStruct... items) {
    + Map<Text, InnerStruct> result = new HashMap<Text, InnerStruct>();
    + for(InnerStruct i: items) {
    + result.put(new Text(i.string1), i);
    + }
    + return result;
    + }
    +
    + private static List<InnerStruct> list(InnerStruct... items) {
    + List<InnerStruct> result = new ArrayList<InnerStruct>();
    + for(InnerStruct s: items) {
    + result.add(s);
    + }
    + return result;
    + }
    +
    + private static BytesWritable bytes(int... items) {
    + BytesWritable result = new BytesWritable();
    + result.setSize(items.length);
    + for(int i=0; i < items.length; ++i) {
    + result.getBytes()[i] = (byte) items[i];
    + }
    + return result;
    + }
    +
    + private static ByteBuffer byteBuf(int... items) {
    + ByteBuffer result = ByteBuffer.allocate(items.length);
    + for(int item: items) {
    + result.put((byte) item);
    + }
    + return result;
    + }
    +
    + Path workDir = new Path(System.getProperty("test.tmp.dir",
    + "target" + File.separator + "test" + File.separator + "tmp"));
    +
    + Configuration conf;
    + FileSystem fs;
    + Path testFilePath;
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new Configuration();
    + fs = FileSystem.getLocal(conf);
    + testFilePath = new Path(workDir, "TestOrcFile." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + @Test
    + public void test1() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 100000, CompressionKind.ZLIB, 10000, 10000);
    + writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
    + Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
    + new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
    + list(inner(3, "good"), inner(4, "bad")),
    + map()));
    + writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
    + Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
    + new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
    + list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
    + map(inner(5,"chani"), inner(1,"mauddib"))));
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    +
    + // check the stats
    + ColumnStatistics[] stats = reader.getStatistics();
    + assertEquals(2, stats[1].getNumberOfValues());
    + assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    + assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    + assertEquals("count: 2 true: 1", stats[1].toString());
    +
    + assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    + assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    + assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    + assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
    + assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
    + stats[3].toString());
    +
    + assertEquals(Long.MAX_VALUE,
    + ((IntegerColumnStatistics) stats[5]).getMaximum());
    + assertEquals(Long.MAX_VALUE,
    + ((IntegerColumnStatistics) stats[5]).getMinimum());
    + assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
    + assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
    + stats[5].toString());
    +
    + assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    + assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    + assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    + assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
    + stats[7].toString());
    +
    + assertEquals("count: 2 min: bye max: hi", stats[9].toString());
    +
    + // check the inspectors
    + StructObjectInspector readerInspector =
    + (StructObjectInspector) reader.getObjectInspector();
    + assertEquals(ObjectInspector.Category.STRUCT,
    + readerInspector.getCategory());
    + assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
    + + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
    + + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
    + + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
    + + "map:map<string,struct<int1:int,string1:string>>>",
    + readerInspector.getTypeName());
    + List<? extends StructField> fields =
    + readerInspector.getAllStructFieldRefs();
    + BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
    + getStructFieldRef("boolean1").getFieldObjectInspector();
    + ByteObjectInspector by = (ByteObjectInspector) readerInspector.
    + getStructFieldRef("byte1").getFieldObjectInspector();
    + ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
    + getStructFieldRef("short1").getFieldObjectInspector();
    + IntObjectInspector in = (IntObjectInspector) readerInspector.
    + getStructFieldRef("int1").getFieldObjectInspector();
    + LongObjectInspector lo = (LongObjectInspector) readerInspector.
    + getStructFieldRef("long1").getFieldObjectInspector();
    + FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
    + getStructFieldRef("float1").getFieldObjectInspector();
    + DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
    + getStructFieldRef("double1").getFieldObjectInspector();
    + BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
    + getStructFieldRef("bytes1").getFieldObjectInspector();
    + StringObjectInspector st = (StringObjectInspector) readerInspector.
    + getStructFieldRef("string1").getFieldObjectInspector();
    + StructObjectInspector mid = (StructObjectInspector) readerInspector.
    + getStructFieldRef("middle").getFieldObjectInspector();
    + List<? extends StructField> midFields =
    + mid.getAllStructFieldRefs();
    + ListObjectInspector midli =
    + (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    + StructObjectInspector inner = (StructObjectInspector)
    + midli.getListElementObjectInspector();
    + List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    + ListObjectInspector li = (ListObjectInspector) readerInspector.
    + getStructFieldRef("list").getFieldObjectInspector();
    + MapObjectInspector ma = (MapObjectInspector) readerInspector.
    + getStructFieldRef("map").getFieldObjectInspector();
    + StructObjectInspector lc = (StructObjectInspector)
    + li.getListElementObjectInspector();
    + StringObjectInspector mk = (StringObjectInspector)
    + ma.getMapKeyObjectInspector();
    + StructObjectInspector mv = (StructObjectInspector)
    + ma.getMapValueObjectInspector();
    + RecordReader rows = reader.rows(null);
    + Object row = rows.next(null);
    + assertNotNull(row);
    + // check the contents of the first row
    + assertEquals(false,
    + bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    + assertEquals(1, by.get(readerInspector.getStructFieldData(row,
    + fields.get(1))));
    + assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
    + fields.get(2))));
    + assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
    + fields.get(3))));
    + assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
    + getStructFieldData(row, fields.get(4))));
    + assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
    + fields.get(5))), 0.00001);
    + assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
    + fields.get(6))), 0.00001);
    + assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
    + readerInspector.getStructFieldData(row, fields.get(7))));
    + assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
    + getStructFieldData(row, fields.get(8))));
    + List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
    + getStructFieldData(row, fields.get(9)), midFields.get(0)));
    + assertNotNull(midRow);
    + assertEquals(2, midRow.size());
    + assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
    + inFields.get(0))));
    + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (midRow.get(0), inFields.get(1))));
    + assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
    + inFields.get(0))));
    + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (midRow.get(1), inFields.get(1))));
    + List<?> list = li.getList(readerInspector.getStructFieldData(row,
    + fields.get(10)));
    + assertEquals(2, list.size());
    + assertEquals(3, in.get(inner.getStructFieldData(list.get(0),
    + inFields.get(0))));
    + assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (list.get(0), inFields.get(1))));
    + assertEquals(4, in.get(inner.getStructFieldData(list.get(1),
    + inFields.get(0))));
    + assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (list.get(1), inFields.get(1))));
    + Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
    + fields.get(11)));
    + assertEquals(0, map.size());
    +
    + // check the contents of second row
    + assertEquals(true, rows.hasNext());
    + row = rows.next(row);
    + assertEquals(true,
    + bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    + assertEquals(100, by.get(readerInspector.getStructFieldData(row,
    + fields.get(1))));
    + assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
    + fields.get(2))));
    + assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
    + fields.get(3))));
    + assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
    + getStructFieldData(row, fields.get(4))));
    + assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
    + fields.get(5))), 0.00001);
    + assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
    + fields.get(6))), 0.00001);
    + assertEquals(bytes(), bi.getPrimitiveWritableObject(
    + readerInspector.getStructFieldData(row, fields.get(7))));
    + assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
    + getStructFieldData(row, fields.get(8))));
    + midRow = midli.getList(mid.getStructFieldData(readerInspector.
    + getStructFieldData(row, fields.get(9)), midFields.get(0)));
    + assertNotNull(midRow);
    + assertEquals(2, midRow.size());
    + assertEquals(1, in.get(inner.getStructFieldData(midRow.get(0),
    + inFields.get(0))));
    + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (midRow.get(0), inFields.get(1))));
    + assertEquals(2, in.get(inner.getStructFieldData(midRow.get(1),
    + inFields.get(0))));
    + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (midRow.get(1), inFields.get(1))));
    + list = li.getList(readerInspector.getStructFieldData(row,
    + fields.get(10)));
    + assertEquals(3, list.size());
    + assertEquals(100000000, in.get(inner.getStructFieldData(list.get(0),
    + inFields.get(0))));
    + assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (list.get(0), inFields.get(1))));
    + assertEquals(-100000, in.get(inner.getStructFieldData(list.get(1),
    + inFields.get(0))));
    + assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (list.get(1), inFields.get(1))));
    + assertEquals(1234, in.get(inner.getStructFieldData(list.get(2),
    + inFields.get(0))));
    + assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData
    + (list.get(2), inFields.get(1))));
    + map = ma.getMap(readerInspector.getStructFieldData(row,
    + fields.get(11)));
    + assertEquals(2, map.size());
    + boolean[] found = new boolean[2];
    + for(Object key: map.keySet()) {
    + String str = mk.getPrimitiveJavaObject(key);
    + if (str.equals("chani")) {
    + assertEquals(false, found[0]);
    + assertEquals(5, in.get(inner.getStructFieldData(map.get(key),
    + inFields.get(0))));
    + assertEquals(str, st.getPrimitiveJavaObject(
    + inner.getStructFieldData(map.get(key), inFields.get(1))));
    + found[0] = true;
    + } else if (str.equals("mauddib")) {
    + assertEquals(false, found[1]);
    + assertEquals(1, in.get(inner.getStructFieldData(map.get(key),
    + inFields.get(0))));
    + assertEquals(str, st.getPrimitiveJavaObject(
    + inner.getStructFieldData(map.get(key), inFields.get(1))));
    + found[1] = true;
    + } else {
    + throw new IllegalArgumentException("Unknown key " + str);
    + }
    + }
    + assertEquals(true, found[0]);
    + assertEquals(true, found[1]);
    +
    + // handle the close up
    + assertEquals(false, rows.hasNext());
    + rows.close();
    + }
    +
    + @Test
    + public void columnProjection() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (InnerStruct.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 1000, CompressionKind.NONE, 100, 1000);
    + Random r1 = new Random(1);
    + Random r2 = new Random(2);
    + int x;
    + int minInt=0, maxInt=0;
    + String y;
    + String minStr = null, maxStr = null;
    + for(int i=0; i < 21000; ++i) {
    + x = r1.nextInt();
    + y = Long.toHexString(r2.nextLong());
    + if (i == 0 || x < minInt) {
    + minInt = x;
    + }
    + if (i == 0 || x > maxInt) {
    + maxInt = x;
    + }
    + if (i == 0 || y.compareTo(minStr) < 0) {
    + minStr = y;
    + }
    + if (i == 0 || y.compareTo(maxStr) > 0) {
    + maxStr = y;
    + }
    + writer.addRow(inner(x, y));
    + }
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    +
    + // check out the statistics
    + ColumnStatistics[] stats = reader.getStatistics();
    + assertEquals(3, stats.length);
    + for(ColumnStatistics s: stats) {
    + assertEquals(21000, s.getNumberOfValues());
    + if (s instanceof IntegerColumnStatistics) {
    + assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
    + assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
    + } else if (s instanceof StringColumnStatistics) {
    + assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
    + assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
    + }
    + }
    +
    + // check out the types
    + List<OrcProto.Type> types = reader.getTypes();
    + assertEquals(3, types.size());
    + assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
    + assertEquals(2, types.get(0).getSubtypesCount());
    + assertEquals(1, types.get(0).getSubtypes(0));
    + assertEquals(2, types.get(0).getSubtypes(1));
    + assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
    + assertEquals(0, types.get(1).getSubtypesCount());
    + assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
    + assertEquals(0, types.get(2).getSubtypesCount());
    +
    + // read the contents and make sure they match
    + RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
    + RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
    + r1 = new Random(1);
    + r2 = new Random(2);
    + OrcStruct row1 = null;
    + OrcStruct row2 = null;
    + for(int i = 0; i < 21000; ++i) {
    + assertEquals(true, rows1.hasNext());
    + assertEquals(true, rows2.hasNext());
    + row1 = (OrcStruct) rows1.next(row1);
    + row2 = (OrcStruct) rows2.next(row2);
    + assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
    + assertEquals(Long.toHexString(r2.nextLong()),
    + row2.getFieldValue(1).toString());
    + }
    + assertEquals(false, rows1.hasNext());
    + assertEquals(false, rows2.hasNext());
    + rows1.close();
    + rows2.close();
    + }
    +
    + @Test
    + public void emptyFile() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 1000, CompressionKind.NONE, 100, 10000);
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + assertEquals(false, reader.rows(null).hasNext());
    + assertEquals(CompressionKind.NONE, reader.getCompression());
    + assertEquals(0, reader.getNumberOfRows());
    + assertEquals(0, reader.getCompressionSize());
    + assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    + assertEquals(3, reader.getContentLength());
    + assertEquals(false, reader.getStripes().iterator().hasNext());
    + }
    +
    + @Test
    + public void metaData() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 1000, CompressionKind.NONE, 100, 10000);
    + writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
    + writer.addUserMetadata("clobber", byteBuf(1,2,3));
    + writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
    + ByteBuffer bigBuf = ByteBuffer.allocate(40000);
    + Random random = new Random(0);
    + random.nextBytes(bigBuf.array());
    + writer.addUserMetadata("big", bigBuf);
    + bigBuf.position(0);
    + writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
    + 42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
    + null, null, null, null));
    + writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
    + assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
    + reader.getMetadataValue("my.meta"));
    + assertEquals(bigBuf, reader.getMetadataValue("big"));
    + try {
    + reader.getMetadataValue("unknown");
    + assertTrue(false);
    + } catch (IllegalArgumentException iae) {
    + // PASS
    + }
    + int i = 0;
    + for(String key: reader.getMetadataKeys()) {
    + if ("my.meta".equals(key) ||
    + "clobber".equals(key) ||
    + "big".equals(key)) {
    + i += 1;
    + } else {
    + throw new IllegalArgumentException("unknown key " + key);
    + }
    + }
    + assertEquals(3, i);
    + }
    +
    + /**
    + * We test union and timestamp separately since we need to make the
    + * object inspector manually. (The Hive reflection-based doesn't handle
    + * them properly.)
    + */
    + @Test
    + public void testUnionAndTimestamp() throws Exception {
    + List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
    + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).
    + addFieldNames("time").addFieldNames("union").
    + addSubtypes(1).addSubtypes(2).build());
    + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).
    + build());
    + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).
    + addSubtypes(3).addSubtypes(4).build());
    + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).
    + build());
    + types.add(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).
    + build());
    +
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = OrcStruct.createObjectInspector(0, types);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 1000, CompressionKind.NONE, 100, 10000);
    + OrcStruct row = new OrcStruct(2);
    + OrcUnion union = new OrcUnion();
    + row.setFieldValue(1, union);
    + row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
    + union.set((byte) 0, new IntWritable(42));
    + writer.addRow(row);
    + row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
    + union.set((byte)1, new Text("hello"));
    + writer.addRow(row);
    + row.setFieldValue(0, null);
    + row.setFieldValue(1, null);
    + writer.addRow(row);
    + row.setFieldValue(1, union);
    + union.set((byte) 0, null);
    + writer.addRow(row);
    + union.set((byte) 1, null);
    + writer.addRow(row);
    + union.set((byte) 0, new IntWritable(200000));
    + row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
    + writer.addRow(row);
    + for(int i=1900; i < 2200; ++i) {
    + row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
    + if ((i & 1) == 0) {
    + union.set((byte) 0, new IntWritable(i*i));
    + } else {
    + union.set((byte) 1, new Text(new Integer(i*i).toString()));
    + }
    + writer.addRow(row);
    + }
    + // let's add a lot of constant rows to test the rle
    + row.setFieldValue(0, null);
    + union.set((byte) 0, new IntWritable(1732050807));
    + for(int i=0; i < 1000; ++i) {
    + writer.addRow(row);
    + }
    + union.set((byte) 0, new IntWritable(0));
    + writer.addRow(row);
    + union.set((byte) 0, new IntWritable(10));
    + writer.addRow(row);
    + union.set((byte) 0, new IntWritable(138));
    + writer.addRow(row);
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    + assertEquals(1309, reader.getNumberOfRows());
    + int stripeCount = 0;
    + int rowCount = 0;
    + long currentOffset = -1;
    + for(StripeInformation stripe: reader.getStripes()) {
    + stripeCount += 1;
    + rowCount += stripe.getNumberOfRows();
    + if (currentOffset < 0) {
    + currentOffset = stripe.getOffset() + stripe.getIndexLength() +
    + stripe.getDataLength() + stripe.getFooterLength();
    + } else {
    + assertEquals(currentOffset, stripe.getOffset());
    + currentOffset += stripe.getIndexLength() +
    + stripe.getDataLength() + stripe.getFooterLength();
    + }
    + }
    + assertEquals(reader.getNumberOfRows(), rowCount);
    + assertEquals(2, stripeCount);
    + assertEquals(reader.getContentLength(), currentOffset);
    + RecordReader rows = reader.rows(null);
    + assertEquals(0, rows.getRowNumber());
    + assertEquals(0.0, rows.getProgress(), 0.000001);
    + assertEquals(true, rows.hasNext());
    + row = (OrcStruct) rows.next(null);
    + inspector = reader.getObjectInspector();
    + assertEquals("struct<time:timestamp,union:union{int, string}>",
    + inspector.getTypeName());
    + assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
    + row.getFieldValue(0));
    + union = (OrcUnion) row.getFieldValue(1);
    + assertEquals(0, union.getTag());
    + assertEquals(new IntWritable(42), union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
    + row.getFieldValue(0));
    + assertEquals(1, union.getTag());
    + assertEquals(new Text("hello"), union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(null, row.getFieldValue(0));
    + assertEquals(null, row.getFieldValue(1));
    + row = (OrcStruct) rows.next(row);
    + assertEquals(null, row.getFieldValue(0));
    + union = (OrcUnion) row.getFieldValue(1);
    + assertEquals(0, union.getTag());
    + assertEquals(null, union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(null, row.getFieldValue(0));
    + assertEquals(1, union.getTag());
    + assertEquals(null, union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
    + row.getFieldValue(0));
    + assertEquals(new IntWritable(200000), union.getObject());
    + for(int i=1900; i < 2200; ++i) {
    + row = (OrcStruct) rows.next(row);
    + assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
    + row.getFieldValue(0));
    + if ((i & 1) == 0) {
    + assertEquals(0, union.getTag());
    + assertEquals(new IntWritable(i*i), union.getObject());
    + } else {
    + assertEquals(1, union.getTag());
    + assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
    + }
    + }
    + for(int i=0; i < 1000; ++i) {
    + row = (OrcStruct) rows.next(row);
    + assertEquals(new IntWritable(1732050807), union.getObject());
    + }
    + row = (OrcStruct) rows.next(row);
    + assertEquals(new IntWritable(0), union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(new IntWritable(10), union.getObject());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(new IntWritable(138), union.getObject());
    + assertEquals(false, rows.hasNext());
    + assertEquals(1.0, rows.getProgress(), 0.00001);
    + assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
    + rows.close();
    + }
    +
    + /**
    + * Read and write a randomly generated snappy file.
    + * @throws Exception
    + */
    + @Test
    + public void testSnappy() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (InnerStruct.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 1000, CompressionKind.SNAPPY, 100, 10000);
    + Random rand = new Random(12);
    + for(int i=0; i < 10000; ++i) {
    + writer.addRow(new InnerStruct(rand.nextInt(),
    + Integer.toHexString(rand.nextInt())));
    + }
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + RecordReader rows = reader.rows(null);
    + rand = new Random(12);
    + OrcStruct row = null;
    + for(int i=0; i < 10000; ++i) {
    + assertEquals(true, rows.hasNext());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(rand.nextInt(), ((IntWritable) row.getFieldValue(0)).get());
    + assertEquals(Integer.toHexString(rand.nextInt()),
    + row.getFieldValue(1).toString());
    + }
    + assertEquals(false, rows.hasNext());
    + rows.close();
    + }
    +
    + /**
    + * Read and write a randomly generated snappy file.
    + * @throws Exception
    + */
    + @Test
    + public void testWithoutIndex() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (InnerStruct.class,
    + ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 5000, CompressionKind.SNAPPY, 1000, 0);
    + Random rand = new Random(24);
    + for(int i=0; i < 10000; ++i) {
    + InnerStruct row = new InnerStruct(rand.nextInt(),
    + Integer.toBinaryString(rand.nextInt()));
    + for(int j=0; j< 5; ++j) {
    + writer.addRow(row);
    + }
    + }
    + writer.close();
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + assertEquals(50000, reader.getNumberOfRows());
    + assertEquals(0, reader.getRowIndexStride());
    + StripeInformation stripe = reader.getStripes().iterator().next();
    + assertEquals(true, stripe.getDataLength() != 0);
    + assertEquals(0, stripe.getIndexLength());
    + RecordReader rows = reader.rows(null);
    + rand = new Random(24);
    + OrcStruct row = null;
    + for(int i=0; i < 10000; ++i) {
    + int intVal = rand.nextInt();
    + String strVal = Integer.toBinaryString(rand.nextInt());
    + for(int j=0; j < 5; ++j) {
    + assertEquals(true, rows.hasNext());
    + row = (OrcStruct) rows.next(row);
    + assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
    + assertEquals(strVal, row.getFieldValue(1).toString());
    + }
    + }
    + assertEquals(false, rows.hasNext());
    + rows.close();
    + }
    +
    + @Test
    + public void testSeek() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 200000, CompressionKind.ZLIB, 65536, 1000);
    + Random rand = new Random(42);
    + final int COUNT=32768;
    + long[] intValues= new long[COUNT];
    + double[] doubleValues = new double[COUNT];
    + String[] stringValues = new String[COUNT];
    + BytesWritable[] byteValues = new BytesWritable[COUNT];
    + String[] words = new String[128];
    + for(int i=0; i < words.length; ++i) {
    + words[i] = Integer.toHexString(rand.nextInt());
    + }
    + for(int i=0; i < COUNT/2; ++i) {
    + intValues[2*i] = rand.nextLong();
    + intValues[2*i+1] = intValues[2*i];
    + stringValues[2*i] = words[rand.nextInt(words.length)];
    + stringValues[2*i+1] = stringValues[2*i];
    + }
    + for(int i=0; i < COUNT; ++i) {
    + doubleValues[i] = rand.nextDouble();
    + byte[] buf = new byte[20];
    + rand.nextBytes(buf);
    + byteValues[i] = new BytesWritable(buf);
    + }
    + for(int i=0; i < COUNT; ++i) {
    + writer.addRow(createRandomRow(intValues, doubleValues, stringValues,
    + byteValues, words, i));
    + }
    + writer.close();
    + writer = null;
    + Reader reader = OrcFile.createReader(fs, testFilePath);
    + assertEquals(COUNT, reader.getNumberOfRows());
    + RecordReader rows = reader.rows(null);
    + OrcStruct row = null;
    + for(int i=COUNT-1; i >= 0; --i) {
    + rows.seekToRow(i);
    + row = (OrcStruct) rows.next(row);
    + BigRow expected = createRandomRow(intValues, doubleValues,
    + stringValues, byteValues, words, i);
    + assertEquals(expected.boolean1.booleanValue(),
    + ((BooleanWritable) row.getFieldValue(0)).get());
    + assertEquals(expected.byte1.byteValue(),
    + ((ByteWritable) row.getFieldValue(1)).get());
    + assertEquals(expected.short1.shortValue(),
    + ((ShortWritable) row.getFieldValue(2)).get());
    + assertEquals(expected.int1.intValue(),
    + ((IntWritable) row.getFieldValue(3)).get());
    + assertEquals(expected.long1.longValue(),
    + ((LongWritable) row.getFieldValue(4)).get());
    + assertEquals(expected.float1.floatValue(),
    + ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
    + assertEquals(expected.double1.doubleValue(),
    + ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
    + assertEquals(expected.bytes1, row.getFieldValue(7));
    + assertEquals(expected.string1, row.getFieldValue(8));
    + List<InnerStruct> expectedList = expected.middle.list;
    + List<OrcStruct> actualList =
    + (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
    + compareList(expectedList, actualList);
    + compareList(expected.list, (List) row.getFieldValue(10));
    + }
    + }
    +
    + private void compareInner(InnerStruct expect,
    + OrcStruct actual) throws Exception {
    + if (expect == null || actual == null) {
    + assertEquals(expect, actual);
    + } else {
    + assertEquals(expect.int1, ((IntWritable) actual.getFieldValue(0)).get());
    + assertEquals(expect.string1, actual.getFieldValue(1));
    + }
    + }
    +
    + private void compareList(List<InnerStruct> expect,
    + List<OrcStruct> actual) throws Exception {
    + assertEquals(expect.size(), actual.size());
    + for(int j=0; j < expect.size(); ++j) {
    + compareInner(expect.get(j), actual.get(j));
    + }
    + }
    +
    + private BigRow createRandomRow(long[] intValues, double[] doubleValues,
    + String[] stringValues,
    + BytesWritable[] byteValues,
    + String[] words, int i) {
    + InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
    + InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
    + words[i % words.length] + "-x");
    + return new BigRow((intValues[i] & 1) == 0, (byte) intValues[i],
    + (short) intValues[i], (int) intValues[i], intValues[i],
    + (float) doubleValues[i], doubleValues[i], byteValues[i],stringValues[i],
    + new MiddleStruct(inner, inner2), list(), map(inner,inner2));
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcStruct.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,131 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
    +import org.junit.Test;
    +
    +import java.util.ArrayList;
    +import java.util.Arrays;
    +import java.util.Collections;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public class TestOrcStruct {
    +
    + @Test
    + public void testStruct() throws Exception {
    + OrcStruct st1 = new OrcStruct(4);
    + OrcStruct st2 = new OrcStruct(4);
    + OrcStruct st3 = new OrcStruct(3);
    + st1.setFieldValue(0, "hop");
    + st1.setFieldValue(1, "on");
    + st1.setFieldValue(2, "pop");
    + st1.setFieldValue(3, 42);
    + assertEquals(false, st1.equals(null));
    + st2.setFieldValue(0, "hop");
    + st2.setFieldValue(1, "on");
    + st2.setFieldValue(2, "pop");
    + st2.setFieldValue(3, 42);
    + assertEquals(st1, st2);
    + st3.setFieldValue(0, "hop");
    + st3.setFieldValue(1, "on");
    + st3.setFieldValue(2, "pop");
    + assertEquals(false, st1.equals(st3));
    + assertEquals(11241, st1.hashCode());
    + assertEquals(st1.hashCode(), st2.hashCode());
    + assertEquals(11204, st3.hashCode());
    + assertEquals("{hop, on, pop, 42}", st1.toString());
    + st1.setFieldValue(3, null);
    + assertEquals(false, st1.equals(st2));
    + assertEquals(false, st2.equals(st1));
    + st2.setFieldValue(3, null);
    + assertEquals(st1, st2);
    + }
    +
    + @Test
    + public void testInspectorFromTypeInfo() throws Exception {
    + TypeInfo typeInfo =
    + TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
    + ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
    + "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
    + ",c13:array<timestamp>>");
    + StructObjectInspector inspector = (StructObjectInspector)
    + OrcStruct.createObjectInspector(typeInfo);
    + assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
    + "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
    + "c1:int>,c11:map<int,int>,c12:union{int},c13:array<timestamp>>",
    + inspector.getTypeName());
    + assertEquals(null,
    + inspector.getAllStructFieldRefs().get(0).getFieldComment());
    + assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
    + OrcStruct s1 = new OrcStruct(13);
    + for(int i=0; i < 13; ++i) {
    + s1.setFieldValue(i, i);
    + }
    +
    + List<Object> list = new ArrayList<Object>();
    + list.addAll(Arrays.asList(0,1,2,3,4,5,6,7,8,9,10,11,12));
    + assertEquals(list, inspector.getStructFieldsDataAsList(s1));
    + ListObjectInspector listOI = (ListObjectInspector)
    + inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
    + assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
    + assertEquals(10, listOI.getListElement(list, 10));
    + assertEquals(13, listOI.getListLength(list));
    +
    + Map<Integer, Integer> map = new HashMap<Integer,Integer>();
    + map.put(1,2);
    + map.put(2,4);
    + map.put(3,6);
    + MapObjectInspector mapOI = (MapObjectInspector)
    + inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
    + assertEquals(3, mapOI.getMapSize(map));
    + assertEquals(4, mapOI.getMapValueElement(map, 2));
    + }
    +
    + @Test
    + public void testUnion() throws Exception {
    + OrcUnion un1 = new OrcUnion();
    + OrcUnion un2 = new OrcUnion();
    + un1.set((byte) 0, "hi");
    + un2.set((byte) 0, "hi");
    + assertEquals(un1, un2);
    + assertEquals(un1.hashCode(), un2.hashCode());
    + un2.set((byte) 0, null);
    + assertEquals(false, un1.equals(un2));
    + assertEquals(false, un2.equals(un1));
    + un1.set((byte) 0, null);
    + assertEquals(un1, un2);
    + un2.set((byte) 0, "hi");
    + un1.set((byte) 1, "hi");
    + assertEquals(false, un1.equals(un2));
    + assertEquals(false, un1.hashCode() == un2.hashCode());
    + un2.set((byte) 1, "byte");
    + assertEquals(false, un1.equals(un2));
    + assertEquals("union(1, hi)", un1.toString());
    + assertEquals(false, un1.equals(null));
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthByteReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,142 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.nio.ByteBuffer;
    +
    +import static junit.framework.Assert.assertEquals;
    +
    +public class TestRunLengthByteReader {
    +
    + @Test
    + public void testUncompressedSeek() throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
    + null, collect));
    + TestInStream.PositionCollector[] positions =
    + new TestInStream.PositionCollector[2048];
    + for(int i=0; i < 2048; ++i) {
    + positions[i] = new TestInStream.PositionCollector();
    + out.getPosition(positions[i]);
    + if (i < 1024) {
    + out.write((byte) (i/4));
    + } else {
    + out.write((byte) i);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
    + inBuf, null, 100));
    + for(int i=0; i < 2048; ++i) {
    + int x = in.next() & 0xff;
    + if (i < 1024) {
    + assertEquals((i/4) & 0xff, x);
    + } else {
    + assertEquals(i & 0xff, x);
    + }
    + }
    + for(int i=2047; i >= 0; --i) {
    + in.seek(positions[i]);
    + int x = in.next() & 0xff;
    + if (i < 1024) {
    + assertEquals((i/4) & 0xff, x);
    + } else {
    + assertEquals(i & 0xff, x);
    + }
    + }
    + }
    +
    + @Test
    + public void testCompressedSeek() throws Exception {
    + CompressionCodec codec = new SnappyCodec();
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 500,
    + codec, collect));
    + TestInStream.PositionCollector[] positions =
    + new TestInStream.PositionCollector[2048];
    + for(int i=0; i < 2048; ++i) {
    + positions[i] = new TestInStream.PositionCollector();
    + out.getPosition(positions[i]);
    + if (i < 1024) {
    + out.write((byte) (i/4));
    + } else {
    + out.write((byte) i);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
    + inBuf, codec, 500));
    + for(int i=0; i < 2048; ++i) {
    + int x = in.next() & 0xff;
    + if (i < 1024) {
    + assertEquals((i/4) & 0xff, x);
    + } else {
    + assertEquals(i & 0xff, x);
    + }
    + }
    + for(int i=2047; i >= 0; --i) {
    + in.seek(positions[i]);
    + int x = in.next() & 0xff;
    + if (i < 1024) {
    + assertEquals((i/4) & 0xff, x);
    + } else {
    + assertEquals(i & 0xff, x);
    + }
    + }
    + }
    +
    + @Test
    + public void testSkips() throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + RunLengthByteWriter out = new RunLengthByteWriter(new OutStream("test", 100,
    + null, collect));
    + for(int i=0; i < 2048; ++i) {
    + if (i < 1024) {
    + out.write((byte) (i/16));
    + } else {
    + out.write((byte) i);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + RunLengthByteReader in = new RunLengthByteReader(InStream.create("test",
    + inBuf, null, 100));
    + for(int i=0; i < 2048; i += 10) {
    + int x = in.next() & 0xff;
    + if (i < 1024) {
    + assertEquals((i/16) & 0xff, x);
    + } else {
    + assertEquals(i & 0xff, x);
    + }
    + if (i < 2038) {
    + in.skip(9);
    + }
    + in.skip(0);
    + }
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRunLengthIntegerReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,122 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.nio.ByteBuffer;
    +import java.util.Random;
    +
    +import static junit.framework.Assert.assertEquals;
    +
    +public class TestRunLengthIntegerReader {
    +
    + public void runSeekTest(CompressionCodec codec) throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + RunLengthIntegerWriter out = new RunLengthIntegerWriter(
    + new OutStream("test", 1000, codec, collect), true);
    + TestInStream.PositionCollector[] positions =
    + new TestInStream.PositionCollector[4096];
    + Random random = new Random(99);
    + int[] junk = new int[2048];
    + for(int i=0; i < junk.length; ++i) {
    + junk[i] = random.nextInt();
    + }
    + for(int i=0; i < 4096; ++i) {
    + positions[i] = new TestInStream.PositionCollector();
    + out.getPosition(positions[i]);
    + // test runs, incrementing runs, non-runs
    + if (i < 1024) {
    + out.write(i/4);
    + } else if (i < 2048) {
    + out.write(2*i);
    + } else {
    + out.write(junk[i-2048]);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
    + ("test", inBuf, codec, 1000), true);
    + for(int i=0; i < 2048; ++i) {
    + int x = (int) in.next();
    + if (i < 1024) {
    + assertEquals(i/4, x);
    + } else if (i < 2048) {
    + assertEquals(2*i, x);
    + } else {
    + assertEquals(junk[i-2048], x);
    + }
    + }
    + for(int i=2047; i >= 0; --i) {
    + in.seek(positions[i]);
    + int x = (int) in.next();
    + if (i < 1024) {
    + assertEquals(i/4, x);
    + } else if (i < 2048) {
    + assertEquals(2*i, x);
    + } else {
    + assertEquals(junk[i-2048], x);
    + }
    + }
    + }
    +
    + @Test
    + public void testUncompressedSeek() throws Exception {
    + runSeekTest(null);
    + }
    +
    + @Test
    + public void testCompressedSeek() throws Exception {
    + runSeekTest(new ZlibCodec());
    + }
    +
    + @Test
    + public void testSkips() throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + RunLengthIntegerWriter out = new RunLengthIntegerWriter(
    + new OutStream("test", 100, null, collect), true);
    + for(int i=0; i < 2048; ++i) {
    + if (i < 1024) {
    + out.write(i);
    + } else {
    + out.write(256 * i);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
    + ("test", inBuf, null, 100), true);
    + for(int i=0; i < 2048; i += 10) {
    + int x = (int) in.next();
    + if (i < 1024) {
    + assertEquals(i, x);
    + } else {
    + assertEquals(256 * i, x);
    + }
    + if (i < 2038) {
    + in.skip(9);
    + }
    + in.skip(0);
    + }
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestSerializationUtils.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,37 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.io.ByteArrayInputStream;
    +import java.io.ByteArrayOutputStream;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public class TestSerializationUtils {
    +
    + @Test
    + public void TestDoubles() throws Exception {
    + ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    + SerializationUtils.writeDouble(buffer, 1343822337.759);
    + assertEquals(1343822337.759,
    + SerializationUtils.readDouble(new
    + ByteArrayInputStream(buffer.toByteArray())), 0.0001);
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStreamName.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,48 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public class TestStreamName {
    +
    + @Test
    + public void test1() throws Exception {
    + StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
    + StreamName s2 = new StreamName(3,
    + OrcProto.Stream.Kind.DICTIONARY_DATA);
    + StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
    + StreamName s4 = new StreamName(5,
    + OrcProto.Stream.Kind.DICTIONARY_DATA);
    + StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
    + assertEquals(true, s1.equals(s1));
    + assertEquals(false, s1.equals(s2));
    + assertEquals(false, s1.equals(s3));
    + assertEquals(true, s1.equals(s1p));
    + assertEquals(true, s1.compareTo(null) < 0);
    + assertEquals(false, s1.equals(null));
    + assertEquals(true, s1.compareTo(s2) < 0);
    + assertEquals(true, s2.compareTo(s3) < 0);
    + assertEquals(true, s3.compareTo(s4) < 0);
    + assertEquals(true, s4.compareTo(s1p) > 0);
    + assertEquals(0, s1p.compareTo(s1));
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringRedBlackTree.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,296 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.io.DataOutputBuffer;
    +import org.apache.hadoop.io.IntWritable;
    +import org.apache.hadoop.io.Text;
    +import org.junit.Test;
    +
    +import java.io.BufferedOutputStream;
    +import java.io.IOException;
    +
    +import static junit.framework.Assert.assertEquals;
    +
    +/**
    + * Test the red-black tree with string keys.
    + */
    +public class TestStringRedBlackTree {
    +
    + /**
    + * Checks the red-black tree rules to make sure that we have correctly built
    + * a valid tree.
    + *
    + * Properties:
    + * 1. Red nodes must have black children
    + * 2. Each node must have the same black height on both sides.
    + *
    + * @param node The id of the root of the subtree to check for the red-black
    + * tree properties.
    + * @return The black-height of the subtree.
    + */
    + private int checkSubtree(RedBlackTree tree, int node, IntWritable count
    + ) throws IOException {
    + if (node == RedBlackTree.NULL) {
    + return 1;
    + }
    + count.set(count.get() + 1);
    + boolean is_red = tree.isRed(node);
    + int left = tree.getLeft(node);
    + int right = tree.getRight(node);
    + if (is_red) {
    + if (tree.isRed(left)) {
    + printTree(tree, "", tree.root);
    + throw new IllegalStateException("Left node of " + node + " is " + left +
    + " and both are red.");
    + }
    + if (tree.isRed(right)) {
    + printTree(tree, "", tree.root);
    + throw new IllegalStateException("Right node of " + node + " is " +
    + right + " and both are red.");
    + }
    + }
    + int left_depth = checkSubtree(tree, left, count);
    + int right_depth = checkSubtree(tree, right, count);
    + if (left_depth != right_depth) {
    + printTree(tree, "", tree.root);
    + throw new IllegalStateException("Lopsided tree at node " + node +
    + " with depths " + left_depth + " and " + right_depth);
    + }
    + if (is_red) {
    + return left_depth;
    + } else {
    + return left_depth + 1;
    + }
    + }
    +
    + /**
    + * Checks the validity of the entire tree. Also ensures that the number of
    + * nodes visited is the same as the size of the set.
    + */
    + void checkTree(RedBlackTree tree) throws IOException {
    + IntWritable count = new IntWritable(0);
    + if (tree.isRed(tree.root)) {
    + printTree(tree, "", tree.root);
    + throw new IllegalStateException("root is red");
    + }
    + checkSubtree(tree, tree.root, count);
    + if (count.get() != tree.size) {
    + printTree(tree, "", tree.root);
    + throw new IllegalStateException("Broken tree! visited= " + count.get() +
    + " size=" + tree.size);
    + }
    + }
    +
    + void printTree(RedBlackTree tree, String indent, int node
    + ) throws IOException {
    + if (node == RedBlackTree.NULL) {
    + System.err.println(indent + "NULL");
    + } else {
    + System.err.println(indent + "Node " + node + " color " +
    + (tree.isRed(node) ? "red" : "black") + " count " + tree.getCount(node));
    + printTree(tree, indent + " ", tree.getLeft(node));
    + printTree(tree, indent + " ", tree.getRight(node));
    + }
    + }
    +
    + private static class MyVisitor implements StringRedBlackTree.Visitor {
    + private final String[] words;
    + private final int[] counts;
    + private final int[] order;
    + private final DataOutputBuffer buffer = new DataOutputBuffer();
    + int current = 0;
    +
    + MyVisitor(String[] args, int[] counts, int[] order) {
    + words = args;
    + this.counts = counts;
    + this.order = order;
    + }
    +
    + @Override
    + public void visit(StringRedBlackTree.VisitorContext context
    + ) throws IOException {
    + String word = context.getText().toString();
    + assertEquals("in word " + current, words[current], word);
    + assertEquals("in word " + current, counts[current], context.getCount());
    + assertEquals("in word " + current, order[current],
    + context.getOriginalPosition());
    + buffer.reset();
    + context.writeBytes(buffer);
    + assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
    + current += 1;
    + }
    + }
    +
    + void checkContents(StringRedBlackTree tree, int[] counts, int[] order,
    + String... params
    + ) throws IOException {
    + tree.visit(new MyVisitor(params, counts, order));
    + }
    +
    + StringRedBlackTree buildTree(String... params) throws IOException {
    + StringRedBlackTree result = new StringRedBlackTree();
    + for(String word: params) {
    + result.add(word);
    + checkTree(result);
    + }
    + return result;
    + }
    +
    + @Test
    + public void test1() throws Exception {
    + StringRedBlackTree tree = new StringRedBlackTree(5);
    + assertEquals(0, tree.getByteSize());
    + checkTree(tree);
    + assertEquals(0, tree.add("owen"));
    + checkTree(tree);
    + assertEquals(1, tree.add("ashutosh"));
    + checkTree(tree);
    + assertEquals(0, tree.add("owen"));
    + checkTree(tree);
    + assertEquals(2, tree.add("alan"));
    + checkTree(tree);
    + assertEquals(2, tree.add("alan"));
    + checkTree(tree);
    + assertEquals(1, tree.add("ashutosh"));
    + checkTree(tree);
    + assertEquals(3, tree.add("greg"));
    + checkTree(tree);
    + assertEquals(4, tree.add("eric"));
    + checkTree(tree);
    + assertEquals(5, tree.add("arun"));
    + checkTree(tree);
    + assertEquals(6, tree.size());
    + checkTree(tree);
    + assertEquals(6, tree.add("eric14"));
    + checkTree(tree);
    + assertEquals(7, tree.add("o"));
    + checkTree(tree);
    + assertEquals(8, tree.add("ziggy"));
    + checkTree(tree);
    + assertEquals(9, tree.add("z"));
    + checkTree(tree);
    + checkContents(tree, new int[]{2,1,2,1,1,1,1,2,1,1},
    + new int[]{2,5,1,4,6,3,7,0,9,8},
    + "alan", "arun", "ashutosh", "eric", "eric14", "greg",
    + "o", "owen", "z", "ziggy");
    + assertEquals(10*5*4 + 8 + 6 + 5 + 5 * 4 + 2 * 1, tree.getByteSize());
    + // check that adding greg again bumps the count
    + assertEquals(1, tree.getCount(3));
    + assertEquals(3, tree.add("greg"));
    + assertEquals(2, tree.getCount(3));
    + assertEquals(41, tree.getCharacterSize());
    + // add some more strings to test the different branches of the
    + // rebalancing
    + assertEquals(10, tree.add("zak"));
    + checkTree(tree);
    + assertEquals(11, tree.add("eric1"));
    + checkTree(tree);
    + assertEquals(12, tree.add("ash"));
    + checkTree(tree);
    + assertEquals(13, tree.add("harry"));
    + checkTree(tree);
    + assertEquals(14, tree.add("john"));
    + checkTree(tree);
    + tree.clear();
    + checkTree(tree);
    + assertEquals(0, tree.getByteSize());
    + assertEquals(0, tree.getCharacterSize());
    + }
    +
    + @Test
    + public void test2() throws Exception {
    + StringRedBlackTree tree =
    + buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
    + "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
    + assertEquals(26, tree.size());
    + checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
    + 1,1,1, 1,1,1, 1,1}, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
    + 15,16,17, 18,19,20, 21,22,23, 24,25},
    + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
    + "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
    + }
    +
    + @Test
    + public void test3() throws Exception {
    + StringRedBlackTree tree =
    + buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
    + "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
    + assertEquals(26, tree.size());
    + checkContents(tree, new int[]{1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 1,1,1,
    + 1,1,1, 1,1,1, 1,1}, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
    + 13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
    + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
    + "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
    + }
    +
    + public static void main(String[] args) throws Exception {
    + TestStringRedBlackTree test = new TestStringRedBlackTree();
    + test.test1();
    + test.test2();
    + test.test3();
    + TestSerializationUtils serUtils = new TestSerializationUtils();
    + serUtils.TestDoubles();
    + TestDynamicArray test6 = new TestDynamicArray();
    + test6.testByteArray();
    + test6.testIntArray();
    + TestZlib zlib = new TestZlib();
    + zlib.testCorrupt();
    + zlib.testNoOverflow();
    + TestInStream inStreamTest = new TestInStream();
    + inStreamTest.testUncompressed();
    + inStreamTest.testCompressed();
    + inStreamTest.testCorruptStream();
    + TestRunLengthByteReader rleByte = new TestRunLengthByteReader();
    + rleByte.testUncompressedSeek();
    + rleByte.testCompressedSeek();
    + rleByte.testSkips();
    + TestRunLengthIntegerReader rleInt = new TestRunLengthIntegerReader();
    + rleInt.testUncompressedSeek();
    + rleInt.testCompressedSeek();
    + rleInt.testSkips();
    + TestBitFieldReader bit = new TestBitFieldReader();
    + bit.testUncompressedSeek();
    + bit.testCompressedSeek();
    + bit.testBiggerItems();
    + bit.testSkips();
    + TestOrcFile test1 = new TestOrcFile();
    + test1.test1();
    + test1.emptyFile();
    + test1.metaData();
    + test1.testUnionAndTimestamp();
    + test1.columnProjection();
    + test1.testSnappy();
    + test1.testWithoutIndex();
    + test1.testSeek();
    + TestFileDump test2 = new TestFileDump();
    + test2.testDump();
    + TestStreamName test3 = new TestStreamName();
    + test3.test1();
    + TestInputOutputFormat test4 = new TestInputOutputFormat();
    + test4.testInOutFormat();
    + test4.testMROutput();
    + test4.testEmptyFile();
    + test4.testDefaultTypes();
    + TestOrcStruct test5 = new TestOrcStruct();
    + test5.testStruct();
    + test5.testInspectorFromTypeInfo();
    + test5.testUnion();
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestZlib.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,55 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +
    +import static junit.framework.Assert.assertEquals;
    +import static junit.framework.Assert.fail;
    +
    +public class TestZlib {
    +
    + @Test
    + public void testNoOverflow() throws Exception {
    + ByteBuffer in = ByteBuffer.allocate(10);
    + ByteBuffer out = ByteBuffer.allocate(10);
    + in.put(new byte[]{1,2,3,4,5,6,7,10});
    + in.flip();
    + CompressionCodec codec = new ZlibCodec();
    + assertEquals(false, codec.compress(in, out, null));
    + }
    +
    + @Test
    + public void testCorrupt() throws Exception {
    + ByteBuffer buf = ByteBuffer.allocate(1000);
    + buf.put(new byte[]{127,-128,0,99,98,-1});
    + buf.flip();
    + CompressionCodec codec = new ZlibCodec();
    + ByteBuffer out = ByteBuffer.allocate(1000);
    + try {
    + codec.decompress(buf, out);
    + fail();
    + } catch (IOException ioe) {
    + // EXPECTED
    + }
    + }
    +}
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,1415 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import com.google.protobuf.ByteString;
    +import com.google.protobuf.CodedOutputStream;
    +import org.apache.hadoop.fs.FSDataOutputStream;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructField;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
    +import org.apache.hadoop.io.BytesWritable;
    +
    +import java.io.IOException;
    +import java.io.OutputStream;
    +import java.nio.ByteBuffer;
    +import java.sql.Timestamp;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.TreeMap;
    +
    +/**
    + * An ORC file writer. The file is divided into stripes, which is the natural
    + * unit of work when reading. Each stripe is buffered in memory until the
    + * memory reaches the stripe size and then it is written out broken down by
    + * columns. Each column is written by a TreeWriter that is specific to that
    + * type of column. TreeWriters may have children TreeWriters that handle the
    + * sub-types. Each of the TreeWriters writes the column's data as a set of
    + * streams.
    + */
    +class WriterImpl implements Writer {
    +
    + private static final int HDFS_BUFFER_SIZE = 256 * 1024;
    + private static final int MIN_ROW_INDEX_STRIDE = 1000;
    +
    + private final FileSystem fs;
    + private final Path path;
    + private final long stripeSize;
    + private final int rowIndexStride;
    + private final CompressionKind compress;
    + private final CompressionCodec codec;
    + private final int bufferSize;
    + // the streams that make up the current stripe
    + private final Map<StreamName, BufferedStream> streams =
    + new TreeMap<StreamName, BufferedStream>();
    +
    + private FSDataOutputStream rawWriter = null;
    + // the compressed metadata information outStream
    + private OutStream writer = null;
    + // a protobuf outStream around streamFactory
    + private CodedOutputStream protobufWriter = null;
    + private long headerLength;
    + private int columnCount;
    + private long rowCount = 0;
    + private long rowsInStripe = 0;
    + private int rowsInIndex = 0;
    + private final List<OrcProto.StripeInformation> stripes =
    + new ArrayList<OrcProto.StripeInformation>();
    + private final Map<String, ByteString> userMetadata =
    + new TreeMap<String, ByteString>();
    + private final StreamFactory streamFactory = new StreamFactory();
    + private final TreeWriter treeWriter;
    + private final OrcProto.RowIndex.Builder rowIndex =
    + OrcProto.RowIndex.newBuilder();
    + private final boolean buildIndex;
    +
    + WriterImpl(FileSystem fs,
    + Path path,
    + ObjectInspector inspector,
    + long stripeSize,
    + CompressionKind compress,
    + int bufferSize,
    + int rowIndexStride) throws IOException {
    + this.fs = fs;
    + this.path = path;
    + this.stripeSize = stripeSize;
    + this.compress = compress;
    + this.bufferSize = bufferSize;
    + this.rowIndexStride = rowIndexStride;
    + buildIndex = rowIndexStride > 0;
    + codec = createCodec(compress);
    + treeWriter = createTreeWriter(inspector, streamFactory, false);
    + if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
    + throw new IllegalArgumentException("Row stride must be at least " +
    + MIN_ROW_INDEX_STRIDE);
    + }
    + }
    +
    + static CompressionCodec createCodec(CompressionKind kind) {
    + switch (kind) {
    + case NONE:
    + return null;
    + case ZLIB:
    + return new ZlibCodec();
    + case SNAPPY:
    + return new SnappyCodec();
    + case LZO:
    + try {
    + Class<? extends CompressionCodec> lzo =
    + (Class<? extends CompressionCodec>)
    + Class.forName("org.apache.hadoop.hive.ql.io.orc.LzoCodec");
    + return lzo.newInstance();
    + } catch (ClassNotFoundException e) {
    + throw new IllegalArgumentException("LZO is not available.", e);
    + } catch (InstantiationException e) {
    + throw new IllegalArgumentException("Problem initializing LZO", e);
    + } catch (IllegalAccessException e) {
    + throw new IllegalArgumentException("Insufficient access to LZO", e);
    + }
    + default:
    + throw new IllegalArgumentException("Unknown compression codec: " +
    + kind);
    + }
    + }
    +
    + /**
    + * This class is used to hold the contents of streams as they are buffered.
    + * The TreeWriters write to the outStream and the codec compresses the
    + * data as buffers fill up and stores them in the output list. When the
    + * stripe is being written, the whole stream is written to the file.
    + */
    + private class BufferedStream implements OutStream.OutputReceiver {
    + private final OutStream outStream;
    + private final List<ByteBuffer> output = new ArrayList<ByteBuffer>();
    +
    + BufferedStream(String name, int bufferSize,
    + CompressionCodec codec) throws IOException {
    + outStream = new OutStream(name, bufferSize, codec, this);
    + }
    +
    + /**
    + * Receive a buffer from the compression codec.
    + * @param buffer the buffer to save
    + * @throws IOException
    + */
    + @Override
    + public void output(ByteBuffer buffer) {
    + output.add(buffer);
    + }
    +
    + /**
    + * Flush the stream to the codec.
    + * @throws IOException
    + */
    + public void flush() throws IOException {
    + outStream.flush();
    + }
    +
    + /**
    + * Clear all of the buffers.
    + * @throws IOException
    + */
    + public void clear() throws IOException {
    + outStream.clear();
    + output.clear();
    + }
    +
    + /**
    + * Write the saved compressed buffers to the OutputStream.
    + * @param out the stream to write to
    + * @throws IOException
    + */
    + void spillTo(OutputStream out) throws IOException {
    + for(ByteBuffer buffer: output) {
    + out.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
    + buffer.remaining());
    + }
    + }
    +
    + /**
    + * Get the size of compressed and uncompressed data in the stream's buffers.
    + * @return the number of bytes in the buffers.
    + */
    + long getSize() {
    + return outStream.getSize();
    + }
    + }
    +
    + /**
    + * An output receiver that writes the ByteBuffers to the output stream
    + * as they are received.
    + */
    + private class DirectStream implements OutStream.OutputReceiver {
    + private final FSDataOutputStream output;
    +
    + DirectStream(FSDataOutputStream output) {
    + this.output = output;
    + }
    +
    + @Override
    + public void output(ByteBuffer buffer) throws IOException {
    + output.write(buffer.array(), buffer.arrayOffset() + buffer.position(),
    + buffer.remaining());
    + }
    + }
    +
    + private static class RowIndexPositionRecorder implements PositionRecorder {
    + private final OrcProto.RowIndexEntry.Builder builder;
    +
    + RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) {
    + this.builder = builder;
    + }
    +
    + @Override
    + public void addPosition(long position) {
    + builder.addPositions(position);
    + }
    + }
    +
    + /**
    + * Interface from the Writer to the TreeWriters. This limits the visibility
    + * that the TreeWriters have into the Writer.
    + */
    + private class StreamFactory {
    + /**
    + * Create a stream to store part of a column.
    + * @param column the column id for the stream
    + * @param kind the kind of stream
    + * @return The output outStream that the section needs to be written to.
    + * @throws IOException
    + */
    + public PositionedOutputStream createStream(int column,
    + OrcProto.Stream.Kind kind
    + ) throws IOException {
    + StreamName name = new StreamName(column, kind);
    + BufferedStream result = streams.get(name);
    + if (result == null) {
    + result = new BufferedStream(name.toString(), bufferSize, codec);
    + streams.put(name, result);
    + }
    + return result.outStream;
    + }
    +
    + /**
    + * Get the next column id.
    + * @return a number from 0 to the number of columns - 1
    + */
    + public int getNextColumnId() {
    + return columnCount++;
    + }
    +
    + /**
    + * Get the stride rate of the row index.
    + */
    + public int getRowIndexStride() {
    + return rowIndexStride;
    + }
    +
    + /**
    + * Should be building the row index.
    + * @return true if we are building the index
    + */
    + public boolean buildIndex() {
    + return buildIndex;
    + }
    + }
    +
    + /**
    + * The parent class of all of the writers for each column. Each column
    + * is written by an instance of this class. The compound types (struct,
    + * list, map, and union) have children tree writers that write the children
    + * types.
    + */
    + private abstract static class TreeWriter {
    + protected final int id;
    + protected final ObjectInspector inspector;
    + private final BitFieldWriter isPresent;
    + protected final ColumnStatisticsImpl indexStatistics;
    + private final ColumnStatisticsImpl fileStatistics;
    + protected TreeWriter[] childrenWriters;
    + protected final RowIndexPositionRecorder rowIndexPosition;
    + private final OrcProto.RowIndex.Builder rowIndex;
    + private final OrcProto.RowIndexEntry.Builder rowIndexEntry;
    + private final PositionedOutputStream rowIndexStream;
    +
    + /**
    + * Create a tree writer
    + * @param columnId the column id of the column to write
    + * @param inspector the object inspector to use
    + * @param streamFactory limited access to the Writer's data.
    + * @param nullable can the value be null?
    + * @throws IOException
    + */
    + TreeWriter(int columnId, ObjectInspector inspector,
    + StreamFactory streamFactory,
    + boolean nullable) throws IOException {
    + this.id = columnId;
    + this.inspector = inspector;
    + if (nullable) {
    + isPresent = new BitFieldWriter(streamFactory.createStream(id,
    + OrcProto.Stream.Kind.PRESENT), 1);
    + } else {
    + isPresent = null;
    + }
    + indexStatistics = ColumnStatisticsImpl.create(inspector);
    + fileStatistics = ColumnStatisticsImpl.create(inspector);
    + childrenWriters = new TreeWriter[0];
    + rowIndex = OrcProto.RowIndex.newBuilder();
    + rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
    + rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry);
    + if (streamFactory.buildIndex()) {
    + rowIndexStream = streamFactory.createStream(id,
    + OrcProto.Stream.Kind.ROW_INDEX);
    + } else {
    + rowIndexStream = null;
    + }
    + }
    +
    + protected OrcProto.RowIndex.Builder getRowIndex() {
    + return rowIndex;
    + }
    +
    + protected ColumnStatisticsImpl getFileStatistics() {
    + return fileStatistics;
    + }
    +
    + protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() {
    + return rowIndexEntry;
    + }
    +
    + /**
    + * Add a new value to the column.
    + * @param obj
    + * @throws IOException
    + */
    + void write(Object obj) throws IOException {
    + if (obj != null) {
    + indexStatistics.increment();
    + }
    + if (isPresent != null) {
    + isPresent.write(obj == null ? 0 : 1);
    + }
    + }
    +
    + /**
    + * Write the stripe out to the file.
    + * @param builder the stripe footer that contains the information about the
    + * layout of the stripe. The TreeWriter is required to update
    + * the footer with its information.
    + * @param requiredIndexEntries the number of index entries that are
    + * required. this is to check to make sure the
    + * row index is well formed.
    + * @throws IOException
    + */
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + if (isPresent != null) {
    + isPresent.flush();
    + }
    + builder.addColumns(getEncoding());
    + if (rowIndexStream != null) {
    + if (rowIndex.getEntryCount() != requiredIndexEntries) {
    + throw new IllegalArgumentException("Column has wrong number of " +
    + "index entries found: " + rowIndexEntry + " expected: " +
    + requiredIndexEntries);
    + }
    + rowIndex.build().writeTo(rowIndexStream);
    + rowIndexStream.flush();
    + }
    + rowIndex.clear();
    + rowIndexEntry.clear();
    + }
    +
    + TreeWriter[] getChildrenWriters() {
    + return childrenWriters;
    + }
    +
    + /**
    + * Get the encoding for this column.
    + * @return the information about the encoding of this column
    + */
    + OrcProto.ColumnEncoding getEncoding() {
    + return OrcProto.ColumnEncoding.newBuilder().setKind(
    + OrcProto.ColumnEncoding.Kind.DIRECT).build();
    + }
    +
    + /**
    + * Create a row index entry with the previous location and the current
    + * index statistics. Also merges the index statistics into the file
    + * statistics before they are cleared. Finally, it records the start of the
    + * next index and ensures all of the children columns also create an entry.
    + * @throws IOException
    + */
    + void createRowIndexEntry() throws IOException {
    + fileStatistics.merge(indexStatistics);
    + rowIndexEntry.setStatistics(indexStatistics.serialize());
    + indexStatistics.reset();
    + rowIndex.addEntry(rowIndexEntry);
    + rowIndexEntry.clear();
    + recordPosition(rowIndexPosition);
    + for(TreeWriter child: childrenWriters) {
    + child.createRowIndexEntry();
    + }
    + }
    +
    + /**
    + * Record the current position in each of this column's streams.
    + * @param recorder where should the locations be recorded
    + * @throws IOException
    + */
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + if (isPresent != null) {
    + isPresent.getPosition(recorder);
    + }
    + }
    +
    + /**
    + * Estimate how much memory the writer is consuming excluding the streams.
    + * @return the number of bytes.
    + */
    + long estimateMemory() {
    + long result = 0;
    + for (TreeWriter child: childrenWriters) {
    + result += child.estimateMemory();
    + }
    + return result;
    + }
    + }
    +
    + private static class BooleanTreeWriter extends TreeWriter {
    + private final BitFieldWriter writer;
    +
    + BooleanTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + PositionedOutputStream out = writer.createStream(id,
    + OrcProto.Stream.Kind.DATA);
    + this.writer = new BitFieldWriter(out, 1);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + boolean val = ((BooleanObjectInspector) inspector).get(obj);
    + indexStatistics.updateBoolean(val);
    + writer.write(val ? 1 : 0);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + writer.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + writer.getPosition(recorder);
    + }
    + }
    +
    + private static class ByteTreeWriter extends TreeWriter {
    + private final RunLengthByteWriter writer;
    +
    + ByteTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + this.writer = new RunLengthByteWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.DATA));
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + byte val = ((ByteObjectInspector) inspector).get(obj);
    + indexStatistics.updateInteger(val);
    + writer.write(val);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + writer.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + writer.getPosition(recorder);
    + }
    + }
    +
    + private static class IntegerTreeWriter extends TreeWriter {
    + private final RunLengthIntegerWriter writer;
    + private final ShortObjectInspector shortInspector;
    + private final IntObjectInspector intInspector;
    + private final LongObjectInspector longInspector;
    +
    + IntegerTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + PositionedOutputStream out = writer.createStream(id,
    + OrcProto.Stream.Kind.DATA);
    + this.writer = new RunLengthIntegerWriter(out, true);
    + if (inspector instanceof IntObjectInspector) {
    + intInspector = (IntObjectInspector) inspector;
    + shortInspector = null;
    + longInspector = null;
    + } else {
    + intInspector = null;
    + if (inspector instanceof LongObjectInspector) {
    + longInspector = (LongObjectInspector) inspector;
    + shortInspector = null;
    + } else {
    + shortInspector = (ShortObjectInspector) inspector;
    + longInspector = null;
    + }
    + }
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + long val;
    + if (intInspector != null) {
    + val = intInspector.get(obj);
    + } else if (longInspector != null) {
    + val = longInspector.get(obj);
    + } else {
    + val = shortInspector.get(obj);
    + }
    + indexStatistics.updateInteger(val);
    + writer.write(val);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + writer.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + writer.getPosition(recorder);
    + }
    + }
    +
    + private static class FloatTreeWriter extends TreeWriter {
    + private final PositionedOutputStream stream;
    +
    + FloatTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + this.stream = writer.createStream(id,
    + OrcProto.Stream.Kind.DATA);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + float val = ((FloatObjectInspector) inspector).get(obj);
    + indexStatistics.updateDouble(val);
    + SerializationUtils.writeFloat(stream, val);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + stream.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + stream.getPosition(recorder);
    + }
    + }
    +
    + private static class DoubleTreeWriter extends TreeWriter {
    + private final PositionedOutputStream stream;
    +
    + DoubleTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + this.stream = writer.createStream(id,
    + OrcProto.Stream.Kind.DATA);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + double val = ((DoubleObjectInspector) inspector).get(obj);
    + indexStatistics.updateDouble(val);
    + SerializationUtils.writeDouble(stream, val);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + stream.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + stream.getPosition(recorder);
    + }
    + }
    +
    + private static class StringTreeWriter extends TreeWriter {
    + private final PositionedOutputStream stringOutput;
    + private final RunLengthIntegerWriter lengthOutput;
    + private final RunLengthIntegerWriter rowOutput;
    + private final RunLengthIntegerWriter countOutput;
    + private final StringRedBlackTree dictionary = new StringRedBlackTree();
    + private final DynamicIntArray rows = new DynamicIntArray();
    + private final List<OrcProto.RowIndexEntry> savedRowIndex =
    + new ArrayList<OrcProto.RowIndexEntry>();
    + private final boolean buildIndex;
    + private final List<Long> rowIndexValueCount = new ArrayList<Long>();
    +
    + StringTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + stringOutput = writer.createStream(id,
    + OrcProto.Stream.Kind.DICTIONARY_DATA);
    + lengthOutput = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.LENGTH), false);
    + rowOutput = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.DATA), false);
    + if (writer.buildIndex()) {
    + countOutput = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.DICTIONARY_COUNT), false);
    + } else {
    + countOutput = null;
    + }
    + recordPosition(rowIndexPosition);
    + rowIndexValueCount.add(0L);
    + buildIndex = writer.buildIndex();
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + String val = ((StringObjectInspector) inspector)
    + .getPrimitiveJavaObject(obj);
    + rows.add(dictionary.add(val));
    + indexStatistics.updateString(val);
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + // Traverse the red-black tree writing out the bytes and lengths; and
    + // creating the map from the original order to the final sorted order.
    + final int[] dumpOrder = new int[dictionary.size()];
    + dictionary.visit(new StringRedBlackTree.Visitor() {
    + private int currentId = 0;
    + @Override
    + public void visit(StringRedBlackTree.VisitorContext context
    + ) throws IOException {
    + context.writeBytes(stringOutput);
    + lengthOutput.write(context.getLength());
    + dumpOrder[context.getOriginalPosition()] = currentId++;
    + if (countOutput != null) {
    + countOutput.write(context.getCount());
    + }
    + }
    + });
    + int length = rows.size();
    + int rowIndexEntry = 0;
    + OrcProto.RowIndex.Builder rowIndex = getRowIndex();
    + // need to build the first index entry out here, to handle the case of
    + // not having any values.
    + if (buildIndex) {
    + while (0 == rowIndexValueCount.get(rowIndexEntry) &&
    + rowIndexEntry < savedRowIndex.size()) {
    + OrcProto.RowIndexEntry.Builder base =
    + savedRowIndex.get(rowIndexEntry++).toBuilder();
    + rowOutput.getPosition(new RowIndexPositionRecorder(base));
    + rowIndex.addEntry(base.build());
    + }
    + }
    + // write the values translated into the dump order.
    + for(int i = 0; i < length; ++i) {
    + // now that we are writing out the row values, we can finalize the
    + // row index
    + if (buildIndex) {
    + while (i == rowIndexValueCount.get(rowIndexEntry) &&
    + rowIndexEntry < savedRowIndex.size()) {
    + OrcProto.RowIndexEntry.Builder base =
    + savedRowIndex.get(rowIndexEntry++).toBuilder();
    + rowOutput.getPosition(new RowIndexPositionRecorder(base));
    + rowIndex.addEntry(base.build());
    + }
    + }
    + rowOutput.write(dumpOrder[rows.get(i)]);
    + }
    + // we need to build the rowindex before calling super, since it
    + // writes it out.
    + super.writeStripe(builder, requiredIndexEntries);
    + stringOutput.flush();
    + lengthOutput.flush();
    + rowOutput.flush();
    + if (countOutput != null) {
    + countOutput.flush();
    + }
    + // reset all of the fields to be ready for the next stripe.
    + dictionary.clear();
    + rows.clear();
    + savedRowIndex.clear();
    + rowIndexValueCount.clear();
    + recordPosition(rowIndexPosition);
    + rowIndexValueCount.add(0L);
    + }
    +
    + @Override
    + OrcProto.ColumnEncoding getEncoding() {
    + return OrcProto.ColumnEncoding.newBuilder().setKind(
    + OrcProto.ColumnEncoding.Kind.DICTIONARY).
    + setDictionarySize(dictionary.size()).build();
    + }
    +
    + /**
    + * This method doesn't call the super method, because unlike most of the
    + * other TreeWriters, this one can't record the position in the streams
    + * until the stripe is being flushed. Therefore it saves all of the entries
    + * and augments them with the final information as the stripe is written.
    + * @throws IOException
    + */
    + void createRowIndexEntry() throws IOException {
    + getFileStatistics().merge(indexStatistics);
    + OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
    + rowIndexEntry.setStatistics(indexStatistics.serialize());
    + indexStatistics.reset();
    + savedRowIndex.add(rowIndexEntry.build());
    + rowIndexEntry.clear();
    + recordPosition(rowIndexPosition);
    + rowIndexValueCount.add(Long.valueOf(rows.size()));
    + }
    +
    + @Override
    + long estimateMemory() {
    + return rows.size() * 4 + dictionary.getByteSize();
    + }
    + }
    +
    + private static class BinaryTreeWriter extends TreeWriter {
    + private final PositionedOutputStream stream;
    + private final RunLengthIntegerWriter length;
    +
    + BinaryTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + this.stream = writer.createStream(id,
    + OrcProto.Stream.Kind.DATA);
    + this.length = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.LENGTH), false);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + BytesWritable val =
    + ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
    + stream.write(val.getBytes(), 0, val.getLength());
    + length.write(val.getLength());
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + stream.flush();
    + length.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + stream.getPosition(recorder);
    + length.getPosition(recorder);
    + }
    + }
    +
    + static final int MILLIS_PER_SECOND = 1000;
    + static final long BASE_TIMESTAMP =
    + Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;
    +
    + private static class TimestampTreeWriter extends TreeWriter {
    + private final RunLengthIntegerWriter seconds;
    + private final RunLengthIntegerWriter nanos;
    +
    + TimestampTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + this.seconds = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.DATA), true);
    + this.nanos = new RunLengthIntegerWriter(writer.createStream(id,
    + OrcProto.Stream.Kind.NANO_DATA), false);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + Timestamp val =
    + ((TimestampObjectInspector) inspector).
    + getPrimitiveJavaObject(obj);
    + seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
    + nanos.write(formatNanos(val.getNanos()));
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + seconds.flush();
    + nanos.flush();
    + recordPosition(rowIndexPosition);
    + }
    +
    + private static long formatNanos(int nanos) {
    + if (nanos == 0) {
    + return 0;
    + } else if (nanos % 100 != 0) {
    + return ((long) nanos) << 3;
    + } else {
    + nanos /= 100;
    + int trailingZeros = 1;
    + while (nanos % 10 == 0 && trailingZeros < 7) {
    + nanos /= 10;
    + trailingZeros += 1;
    + }
    + return ((long) nanos) << 3 | trailingZeros;
    + }
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + seconds.getPosition(recorder);
    + nanos.getPosition(recorder);
    + }
    + }
    +
    + private static class StructTreeWriter extends TreeWriter {
    + private final List<? extends StructField> fields;
    + StructTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + StructObjectInspector structObjectInspector =
    + (StructObjectInspector) inspector;
    + fields = structObjectInspector.getAllStructFieldRefs();
    + childrenWriters = new TreeWriter[fields.size()];
    + for(int i=0; i < childrenWriters.length; ++i) {
    + childrenWriters[i] = createTreeWriter(
    + fields.get(i).getFieldObjectInspector(), writer, true);
    + }
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + StructObjectInspector insp = (StructObjectInspector) inspector;
    + for(int i = 0; i < fields.size(); ++i) {
    + StructField field = fields.get(i);
    + TreeWriter writer = childrenWriters[i];
    + writer.write(insp.getStructFieldData(obj, field));
    + }
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + for(TreeWriter child: childrenWriters) {
    + child.writeStripe(builder, requiredIndexEntries);
    + }
    + recordPosition(rowIndexPosition);
    + }
    + }
    +
    + private static class ListTreeWriter extends TreeWriter {
    + private final RunLengthIntegerWriter lengths;
    +
    + ListTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
    + childrenWriters = new TreeWriter[1];
    + childrenWriters[0] =
    + createTreeWriter(listObjectInspector.getListElementObjectInspector(),
    + writer, true);
    + lengths =
    + new RunLengthIntegerWriter(writer.createStream(columnId,
    + OrcProto.Stream.Kind.LENGTH), false);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + ListObjectInspector insp = (ListObjectInspector) inspector;
    + int len = insp.getListLength(obj);
    + lengths.write(len);
    + for(int i=0; i < len; ++i) {
    + childrenWriters[0].write(insp.getListElement(obj, i));
    + }
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + lengths.flush();
    + for(TreeWriter child: childrenWriters) {
    + child.writeStripe(builder, requiredIndexEntries);
    + }
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + lengths.getPosition(recorder);
    + }
    + }
    +
    + private static class MapTreeWriter extends TreeWriter {
    + private final RunLengthIntegerWriter lengths;
    +
    + MapTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + MapObjectInspector insp = (MapObjectInspector) inspector;
    + childrenWriters = new TreeWriter[2];
    + childrenWriters[0] =
    + createTreeWriter(insp.getMapKeyObjectInspector(), writer, true);
    + childrenWriters[1] =
    + createTreeWriter(insp.getMapValueObjectInspector(), writer, true);
    + lengths =
    + new RunLengthIntegerWriter(writer.createStream(columnId,
    + OrcProto.Stream.Kind.LENGTH), false);
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + MapObjectInspector insp = (MapObjectInspector) inspector;
    + int len = insp.getMapSize(obj);
    + lengths.write(len);
    + // this sucks, but it will have to do until we can get a better
    + // accessor in the MapObjectInspector.
    + Map<?, ?> valueMap = insp.getMap(obj);
    + for(Map.Entry<?, ?> entry: valueMap.entrySet()) {
    + childrenWriters[0].write(entry.getKey());
    + childrenWriters[1].write(entry.getValue());
    + }
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + lengths.flush();
    + for(TreeWriter child: childrenWriters) {
    + child.writeStripe(builder, requiredIndexEntries);
    + }
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + lengths.getPosition(recorder);
    + }
    + }
    +
    + private static class UnionTreeWriter extends TreeWriter {
    + private final RunLengthByteWriter tags;
    +
    + UnionTreeWriter(int columnId,
    + ObjectInspector inspector,
    + StreamFactory writer,
    + boolean nullable) throws IOException {
    + super(columnId, inspector, writer, nullable);
    + UnionObjectInspector insp = (UnionObjectInspector) inspector;
    + List<ObjectInspector> choices = insp.getObjectInspectors();
    + childrenWriters = new TreeWriter[choices.size()];
    + for(int i=0; i < childrenWriters.length; ++i) {
    + childrenWriters[i] = createTreeWriter(choices.get(i), writer, true);
    + }
    + tags =
    + new RunLengthByteWriter(writer.createStream(columnId,
    + OrcProto.Stream.Kind.DATA));
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void write(Object obj) throws IOException {
    + super.write(obj);
    + if (obj != null) {
    + UnionObjectInspector insp = (UnionObjectInspector) inspector;
    + byte tag = insp.getTag(obj);
    + tags.write(tag);
    + childrenWriters[tag].write(insp.getField(obj));
    + }
    + }
    +
    + @Override
    + void writeStripe(OrcProto.StripeFooter.Builder builder,
    + int requiredIndexEntries) throws IOException {
    + super.writeStripe(builder, requiredIndexEntries);
    + tags.flush();
    + for(TreeWriter child: childrenWriters) {
    + child.writeStripe(builder, requiredIndexEntries);
    + }
    + recordPosition(rowIndexPosition);
    + }
    +
    + @Override
    + void recordPosition(PositionRecorder recorder) throws IOException {
    + super.recordPosition(recorder);
    + tags.getPosition(recorder);
    + }
    + }
    +
    + private static TreeWriter createTreeWriter(ObjectInspector inspector,
    + StreamFactory streamFactory,
    + boolean nullable
    + ) throws IOException {
    + switch (inspector.getCategory()) {
    + case PRIMITIVE:
    + switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
    + case BOOLEAN:
    + return new BooleanTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case BYTE:
    + return new ByteTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case SHORT:
    + case INT:
    + case LONG:
    + return new IntegerTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case FLOAT:
    + return new FloatTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case DOUBLE:
    + return new DoubleTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case STRING:
    + return new StringTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case BINARY:
    + return new BinaryTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + case TIMESTAMP:
    + return new TimestampTreeWriter(streamFactory.getNextColumnId(),
    + inspector, streamFactory, nullable);
    + default:
    + throw new IllegalArgumentException("Bad primitive category " +
    + ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
    + }
    + case STRUCT:
    + return new StructTreeWriter(streamFactory.getNextColumnId(), inspector,
    + streamFactory, nullable);
    + case MAP:
    + return new MapTreeWriter(streamFactory.getNextColumnId(), inspector,
    + streamFactory, nullable);
    + case LIST:
    + return new ListTreeWriter(streamFactory.getNextColumnId(), inspector,
    + streamFactory, nullable);
    + case UNION:
    + return new UnionTreeWriter(streamFactory.getNextColumnId(), inspector,
    + streamFactory, nullable);
    + default:
    + throw new IllegalArgumentException("Bad category: " +
    + inspector.getCategory());
    + }
    + }
    +
    + private static void writeTypes(OrcProto.Footer.Builder builder,
    + TreeWriter treeWriter) {
    + OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
    + switch (treeWriter.inspector.getCategory()) {
    + case PRIMITIVE:
    + switch (((PrimitiveObjectInspector) treeWriter.inspector).
    + getPrimitiveCategory()) {
    + case BOOLEAN:
    + type.setKind(OrcProto.Type.Kind.BOOLEAN);
    + break;
    + case BYTE:
    + type.setKind(OrcProto.Type.Kind.BYTE);
    + break;
    + case SHORT:
    + type.setKind(OrcProto.Type.Kind.SHORT);
    + break;
    + case INT:
    + type.setKind(OrcProto.Type.Kind.INT);
    + break;
    + case LONG:
    + type.setKind(OrcProto.Type.Kind.LONG);
    + break;
    + case FLOAT:
    + type.setKind(OrcProto.Type.Kind.FLOAT);
    + break;
    + case DOUBLE:
    + type.setKind(OrcProto.Type.Kind.DOUBLE);
    + break;
    + case STRING:
    + type.setKind(OrcProto.Type.Kind.STRING);
    + break;
    + case BINARY:
    + type.setKind(OrcProto.Type.Kind.BINARY);
    + break;
    + case TIMESTAMP:
    + type.setKind(OrcProto.Type.Kind.TIMESTAMP);
    + break;
    + default:
    + throw new IllegalArgumentException("Unknown primitive category: " +
    + ((PrimitiveObjectInspector) treeWriter.inspector).
    + getPrimitiveCategory());
    + }
    + break;
    + case LIST:
    + type.setKind(OrcProto.Type.Kind.LIST);
    + type.addSubtypes(treeWriter.childrenWriters[0].id);
    + break;
    + case MAP:
    + type.setKind(OrcProto.Type.Kind.MAP);
    + type.addSubtypes(treeWriter.childrenWriters[0].id);
    + type.addSubtypes(treeWriter.childrenWriters[1].id);
    + break;
    + case STRUCT:
    + type.setKind(OrcProto.Type.Kind.STRUCT);
    + for(TreeWriter child: treeWriter.childrenWriters) {
    + type.addSubtypes(child.id);
    + }
    + for(StructField field: ((StructTreeWriter) treeWriter).fields) {
    + type.addFieldNames(field.getFieldName());
    + }
    + break;
    + case UNION:
    + type.setKind(OrcProto.Type.Kind.UNION);
    + for(TreeWriter child: treeWriter.childrenWriters) {
    + type.addSubtypes(child.id);
    + }
    + break;
    + default:
    + throw new IllegalArgumentException("Unknown category: " +
    + treeWriter.inspector.getCategory());
    + }
    + builder.addTypes(type);
    + for(TreeWriter child: treeWriter.childrenWriters) {
    + writeTypes(builder, child);
    + }
    + }
    +
    + private void ensureWriter() throws IOException {
    + if (rawWriter == null) {
    + rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
    + fs.getDefaultReplication(),
    + Math.min(stripeSize * 2L, Integer.MAX_VALUE));
    + rawWriter.writeBytes(OrcFile.MAGIC);
    + headerLength = rawWriter.getPos();
    + writer = new OutStream("metadata", bufferSize, codec,
    + new DirectStream(rawWriter));
    + protobufWriter = CodedOutputStream.newInstance(writer);
    + }
    + }
    +
    + private void createRowIndexEntry() throws IOException {
    + treeWriter.createRowIndexEntry();
    + rowsInIndex = 0;
    + }
    +
    + private void flushStripe() throws IOException {
    + ensureWriter();
    + if (buildIndex && rowsInIndex != 0) {
    + createRowIndexEntry();
    + }
    + if (rowsInStripe != 0) {
    + int requiredIndexEntries = rowIndexStride == 0 ? 0 :
    + (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
    + OrcProto.StripeFooter.Builder builder =
    + OrcProto.StripeFooter.newBuilder();
    + treeWriter.writeStripe(builder, requiredIndexEntries);
    + long start = rawWriter.getPos();
    + long section = start;
    + long indexEnd = start;
    + for(Map.Entry<StreamName, BufferedStream> pair: streams.entrySet()) {
    + BufferedStream stream = pair.getValue();
    + stream.flush();
    + stream.spillTo(rawWriter);
    + stream.clear();
    + long end = rawWriter.getPos();
    + StreamName name = pair.getKey();
    + builder.addStreams(OrcProto.Stream.newBuilder()
    + .setColumn(name.getColumn())
    + .setKind(name.getKind())
    + .setLength(end-section));
    + section = end;
    + if (StreamName.Area.INDEX == name.getArea()) {
    + indexEnd = end;
    + }
    + }
    + builder.build().writeTo(protobufWriter);
    + protobufWriter.flush();
    + writer.flush();
    + long end = rawWriter.getPos();
    + OrcProto.StripeInformation dirEntry =
    + OrcProto.StripeInformation.newBuilder()
    + .setOffset(start)
    + .setIndexLength(indexEnd - start)
    + .setDataLength(section - indexEnd)
    + .setNumberOfRows(rowsInStripe)
    + .setFooterLength(end - section).build();
    + stripes.add(dirEntry);
    + rowCount += rowsInStripe;
    + rowsInStripe = 0;
    + }
    + }
    +
    + private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
    + switch (kind) {
    + case NONE: return OrcProto.CompressionKind.NONE;
    + case ZLIB: return OrcProto.CompressionKind.ZLIB;
    + case SNAPPY: return OrcProto.CompressionKind.SNAPPY;
    + case LZO: return OrcProto.CompressionKind.LZO;
    + default:
    + throw new IllegalArgumentException("Unknown compression " + kind);
    + }
    + }
    +
    + private void writeFileStatistics(OrcProto.Footer.Builder builder,
    + TreeWriter writer) throws IOException {
    + builder.addStatistics(writer.fileStatistics.serialize());
    + for(TreeWriter child: writer.getChildrenWriters()) {
    + writeFileStatistics(builder, child);
    + }
    + }
    +
    + private int writeFooter(long bodyLength) throws IOException {
    + ensureWriter();
    + OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder();
    + builder.setContentLength(bodyLength);
    + builder.setHeaderLength(headerLength);
    + builder.setNumberOfRows(rowCount);
    + builder.setRowIndexStride(rowIndexStride);
    + // serialize the types
    + writeTypes(builder, treeWriter);
    + // add the stripe information
    + for(OrcProto.StripeInformation stripe: stripes) {
    + builder.addStripes(stripe);
    + }
    + // add the column statistics
    + writeFileStatistics(builder, treeWriter);
    + // add all of the user metadata
    + for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) {
    + builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
    + .setName(entry.getKey()).setValue(entry.getValue()));
    + }
    + long startPosn = rawWriter.getPos();
    + builder.build().writeTo(protobufWriter);
    + protobufWriter.flush();
    + writer.flush();
    + return (int) (rawWriter.getPos() - startPosn);
    + }
    +
    + private int writePostScript(int footerLength) throws IOException {
    + OrcProto.PostScript.Builder builder =
    + OrcProto.PostScript.newBuilder()
    + .setCompression(writeCompressionKind(compress))
    + .setFooterLength(footerLength);
    + if (compress != CompressionKind.NONE) {
    + builder.setCompressionBlockSize(bufferSize);
    + }
    + OrcProto.PostScript ps = builder.build();
    + // need to write this uncompressed
    + long startPosn = rawWriter.getPos();
    + ps.writeTo(rawWriter);
    + long length = rawWriter.getPos() - startPosn;
    + if (length > 255) {
    + throw new IllegalArgumentException("PostScript too large at " + length);
    + }
    + return (int) length;
    + }
    +
    + private long estimateStripeSize() {
    + long result = 0;
    + for(BufferedStream stream: streams.values()) {
    + result += stream.getSize();
    + }
    + result += treeWriter.estimateMemory();
    + return result;
    + }
    +
    + @Override
    + public void addUserMetadata(String name, ByteBuffer value) {
    + userMetadata.put(name, ByteString.copyFrom(value));
    + }
    +
    + @Override
    + public void addRow(Object row) throws IOException {
    + treeWriter.write(row);
    + rowsInStripe += 1;
    + if (buildIndex) {
    + rowsInIndex += 1;
    +
    + if (rowsInIndex >= rowIndexStride) {
    + createRowIndexEntry();
    + }
    + }
    + // once every 1000 rows, check the size to see if we should spill
    + if (rowsInStripe % 1000 == 0 && estimateStripeSize() > stripeSize) {
    + flushStripe();
    + }
    + }
    +
    + @Override
    + public void close() throws IOException {
    + flushStripe();
    + int footerLength = writeFooter(rawWriter.getPos());
    + rawWriter.writeByte(writePostScript(footerLength));
    + rawWriter.close();
    + }
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ZlibCodec.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,77 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.util.zip.DataFormatException;
    +import java.util.zip.Deflater;
    +import java.util.zip.Inflater;
    +
    +class ZlibCodec implements CompressionCodec {
    +
    + @Override
    + public boolean compress(ByteBuffer in, ByteBuffer out,
    + ByteBuffer overflow) throws IOException {
    + Deflater deflater = new Deflater(Deflater.DEFAULT_COMPRESSION, true);
    + int length = in.remaining();
    + deflater.setInput(in.array(), in.arrayOffset() + in.position(), length);
    + deflater.finish();
    + int outSize = 0;
    + int offset = out.arrayOffset() + out.position();
    + while (!deflater.finished() && (length > outSize)) {
    + int size = deflater.deflate(out.array(), offset, out.remaining());
    + out.position(size + out.position());
    + outSize += size;
    + offset += size;
    + // if we run out of space in the out buffer, use the overflow
    + if (out.remaining() == 0) {
    + if (overflow == null) {
    + deflater.end();
    + return false;
    + }
    + out = overflow;
    + offset = out.arrayOffset() + out.position();
    + }
    + }
    + deflater.end();
    + return length > outSize;
    + }
    +
    + @Override
    + public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
    + Inflater inflater = new Inflater(true);
    + inflater.setInput(in.array(), in.arrayOffset() + in.position(),
    + in.remaining());
    + while (!(inflater.finished() || inflater.needsDictionary() ||
    + inflater.needsInput())) {
    + try {
    + int count = inflater.inflate(out.array(),
    + out.arrayOffset() + out.position(),
    + out.remaining());
    + out.position(count + out.position());
    + } catch (DataFormatException dfe) {
    + throw new IOException("Bad compression data", dfe);
    + }
    + }
    + out.flip();
    + inflater.end();
    + in.position(in.limit());
    + }
    +
    +}

    Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/package-info.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/package-info.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/package-info.java (added)
    +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/package-info.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,70 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +/**
    + * The Optimized Row Columnar (ORC) File Format.
    + *
    + * This format:
    + * <ul>
    + * <li>Decomposes complex column types into primitives</li>
    + * <li>Uses type-specific encoders for each column
    + * <ul>
    + * <li>Dictionary encodings for low cardinality columns</li>
    + * <li>Run length encoding of data</li>
    + * <li>variable length encoding of integers</li>
    + * </ul>
    + * </li>
    + * <li>Divides file into large stripes</li>
    + * <li>Each stripe includes light-weight indexes that enable the reader to
    + * skip large sets of rows that don't satisfy the filter condition</li>
    + * <li>A file footer that contains meta-information about file
    + * <ul>
    + * <li>Precise byte range for each stripe</li>
    + * <li>Type information for the file</li>
    + * <li>Any user meta-information</li>
    + * </ul>
    + * </li>
    + * <li>Seek to row number is implemented to support secondary indexes</li>
    + * <li>Support for additional generic compression: LZO, SNAPPY, ZLIB.</li>
    + * </ul>
    + *
    + * <p>
    + * <b>Format:</b>
    + * <pre>
    + * {@code
    + * HEADER (3 bytes) "ORC"
    + * STRIPE (0 or more stripes)
    + * FILE-FOOTER
    + * POST SCRIPT
    + * PS LENGTH (1 byte)
    + * }
    + * </pre>
    + * </p>
    + *
    + * <p>
    + * <b>Stripe:</b>
    + * <pre>
    + * {@code
    + * INDEX-STREAM (0 or more)
    + * DATA-STREAM (0 or more)
    + * STRIPE-FOOTER
    + * }
    + * </pre>
    + * </p>
    + */
    +package org.apache.hadoop.hive.ql.io.orc;

    Added: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (added)
    +++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,130 @@
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +message IntegerStatistics {
    + optional sint64 minimum = 1;
    + optional sint64 maximum = 2;
    + optional sint64 sum = 3;
    +}
    +
    +message DoubleStatistics {
    + optional double minimum = 1;
    + optional double maximum = 2;
    + optional double sum = 3;
    +}
    +
    +message StringStatistics {
    + optional string minimum = 1;
    + optional string maximum = 2;
    +}
    +
    +message BucketStatistics {
    + repeated uint64 count = 1 [packed=true];
    +}
    +
    +message ColumnStatistics {
    + optional uint64 numberOfValues = 1;
    + optional IntegerStatistics intStatistics = 2;
    + optional DoubleStatistics doubleStatistics = 3;
    + optional StringStatistics stringStatistics = 4;
    + optional BucketStatistics bucketStatistics = 5;
    +}
    +
    +message RowIndexEntry {
    + repeated uint64 positions = 1 [packed=true];
    + optional ColumnStatistics statistics = 2;
    +}
    +
    +message RowIndex {
    + repeated RowIndexEntry entry = 1;
    +}
    +
    +message Stream {
    + // if you add new index stream kinds, you need to make sure to update
    + // StreamName to ensure it is added to the stripe in the right area
    + enum Kind {
    + PRESENT = 0;
    + DATA = 1;
    + LENGTH = 2;
    + DICTIONARY_DATA = 3;
    + DICTIONARY_COUNT = 4;
    + NANO_DATA = 5;
    + ROW_INDEX = 6;
    + }
    + required Kind kind = 1;
    + optional uint32 column = 2;
    + optional uint64 length = 3;
    +}
    +
    +message ColumnEncoding {
    + enum Kind {
    + DIRECT = 0;
    + DICTIONARY = 1;
    + }
    + required Kind kind = 1;
    + optional uint32 dictionarySize = 2;
    +}
    +
    +message StripeFooter {
    + repeated Stream streams = 1;
    + repeated ColumnEncoding columns = 2;
    +}
    +
    +message Type {
    + enum Kind {
    + BOOLEAN = 0;
    + BYTE = 1;
    + SHORT = 2;
    + INT = 3;
    + LONG = 4;
    + FLOAT = 5;
    + DOUBLE = 6;
    + STRING = 7;
    + BINARY = 8;
    + TIMESTAMP = 9;
    + LIST = 10;
    + MAP = 11;
    + STRUCT = 12;
    + UNION = 13;
    + }
    + required Kind kind = 1;
    + repeated uint32 subtypes = 2 [packed=true];
    + repeated string fieldNames = 3;
    +}
    +
    +message StripeInformation {
    + optional uint64 offset = 1;
    + optional uint64 indexLength = 2;
    + optional uint64 dataLength = 3;
    + optional uint64 footerLength = 4;
    + optional uint64 numberOfRows = 5;
    +}
    +
    +message UserMetadataItem {
    + required string name = 1;
    + required bytes value = 2;
    +}
    +
    +message Footer {
    + optional uint64 headerLength = 1;
    + optional uint64 contentLength = 2;
    + repeated StripeInformation stripes = 3;
    + repeated Type types = 4;
    + repeated UserMetadataItem metadata = 5;
    + optional uint64 numberOfRows = 6;
    + repeated ColumnStatistics statistics = 7;
    + optional uint32 rowIndexStride = 8;
    +}
    +
    +enum CompressionKind {
    + NONE = 0;
    + ZLIB = 1;
    + SNAPPY = 2;
    + LZO = 3;
    +}
    +
    +// Serialized length must be less that 255 bytes
    +message PostScript {
    + optional uint64 footerLength = 1;
    + optional CompressionKind compression = 2;
    + optional uint64 compressionBlockSize = 3;
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestBitFieldReader.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,143 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.nio.ByteBuffer;
    +import java.util.Random;
    +
    +import static junit.framework.Assert.assertEquals;
    +
    +public class TestBitFieldReader {
    +
    + public void runSeekTest(CompressionCodec codec) throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + final int COUNT = 16384;
    + BitFieldWriter out = new BitFieldWriter(
    + new OutStream("test", 500, codec, collect), 1);
    + TestInStream.PositionCollector[] positions =
    + new TestInStream.PositionCollector[COUNT];
    + for(int i=0; i < COUNT; ++i) {
    + positions[i] = new TestInStream.PositionCollector();
    + out.getPosition(positions[i]);
    + // test runs, non-runs
    + if (i < COUNT / 2) {
    + out.write(i & 1);
    + } else {
    + out.write((i/3) & 1);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + BitFieldReader in = new BitFieldReader(InStream.create("test", inBuf,
    + codec, 500), 1);
    + for(int i=0; i < COUNT; ++i) {
    + int x = in.next();
    + if (i < COUNT / 2) {
    + assertEquals(i & 1, x);
    + } else {
    + assertEquals((i/3) & 1, x);
    + }
    + }
    + for(int i=COUNT-1; i >= 0; --i) {
    + in.seek(positions[i]);
    + int x = in.next();
    + if (i < COUNT / 2) {
    + assertEquals(i & 1, x);
    + } else {
    + assertEquals((i/3) & 1, x);
    + }
    + }
    + }
    +
    + @Test
    + public void testUncompressedSeek() throws Exception {
    + runSeekTest(null);
    + }
    +
    + @Test
    + public void testCompressedSeek() throws Exception {
    + runSeekTest(new ZlibCodec());
    + }
    +
    + @Test
    + public void testBiggerItems() throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + final int COUNT = 16384;
    + BitFieldWriter out = new BitFieldWriter(
    + new OutStream("test", 500, null, collect), 3);
    + for(int i=0; i < COUNT; ++i) {
    + // test runs, non-runs
    + if (i < COUNT / 2) {
    + out.write(i & 7);
    + } else {
    + out.write((i/3) & 7);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + BitFieldReader in = new BitFieldReader(InStream.create("test", inBuf,
    + null, 500), 3);
    + for(int i=0; i < COUNT; ++i) {
    + int x = in.next();
    + if (i < COUNT / 2) {
    + assertEquals(i & 7, x);
    + } else {
    + assertEquals((i/3) & 7, x);
    + }
    + }
    + }
    +
    + @Test
    + public void testSkips() throws Exception {
    + TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
    + BitFieldWriter out = new BitFieldWriter(
    + new OutStream("test", 100, null, collect), 1);
    + final int COUNT = 16384;
    + for(int i=0; i < COUNT; ++i) {
    + if (i < COUNT/2) {
    + out.write(i & 1);
    + } else {
    + out.write((i/3) & 1);
    + }
    + }
    + out.flush();
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + BitFieldReader in = new BitFieldReader(InStream.create
    + ("test", inBuf, null, 100), 1);
    + for(int i=0; i < COUNT; i += 5) {
    + int x = (int) in.next();
    + if (i < COUNT/2) {
    + assertEquals(i & 1, x);
    + } else {
    + assertEquals((i/3) & 1, x);
    + }
    + if (i < COUNT - 5) {
    + in.skip(4);
    + }
    + in.skip(0);
    + }
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestDynamicArray.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,87 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import java.util.Random;
    +import org.junit.Test;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public class TestDynamicArray {
    +
    + @Test
    + public void testByteArray() throws Exception {
    + DynamicByteArray dba = new DynamicByteArray(3, 10);
    + dba.add((byte) 0);
    + dba.add((byte) 1);
    + dba.set(3, (byte) 3);
    + dba.set(2, (byte) 2);
    + dba.add((byte) 4);
    + assertEquals("{0,1,2,3,4}", dba.toString());
    + assertEquals(5, dba.size());
    + byte[] val;
    + val = new byte[0];
    + assertEquals(0, dba.compare(val, 0, 0, 2, 0));
    + assertEquals(-1, dba.compare(val, 0, 0, 2, 1));
    + val = new byte[]{3,42};
    + assertEquals(1, dba.compare(val, 0, 1, 2, 0));
    + assertEquals(1, dba.compare(val, 0, 1, 2, 1));
    + assertEquals(0, dba.compare(val, 0, 1, 3, 1));
    + assertEquals(-1, dba.compare(val, 0, 1, 3, 2));
    + assertEquals(1, dba.compare(val, 0, 2, 3, 1));
    + val = new byte[256];
    + for(int b=-128; b < 128; ++b) {
    + dba.add((byte) b);
    + val[b+128] = (byte) b;
    + }
    + assertEquals(0, dba.compare(val, 0, 256, 5, 256));
    + assertEquals(1, dba.compare(val, 0, 1, 0, 1));
    + assertEquals(1, dba.compare(val, 254, 1, 0, 1));
    + assertEquals(1, dba.compare(val, 120, 1, 64, 1));
    + val = new byte[1024];
    + Random rand = new Random(1701);
    + for(int i = 0; i < val.length; ++i) {
    + rand.nextBytes(val);
    + }
    + dba.add(val, 0, 1024);
    + assertEquals(1285, dba.size());
    + assertEquals(0, dba.compare(val, 0, 1024, 261, 1024));
    + }
    +
    + @Test
    + public void testIntArray() throws Exception {
    + DynamicIntArray dia = new DynamicIntArray(10);
    + for(int i=0; i < 10000; ++i) {
    + dia.add(2*i);
    + }
    + assertEquals(10000, dia.size());
    + for(int i=0; i < 10000; ++i) {
    + assertEquals(2*i, dia.get(i));
    + }
    + dia.clear();
    + assertEquals(0, dia.size());
    + dia.add(3);
    + dia.add(12);
    + dia.add(65);
    + assertEquals("{3,12,65}", dia.toString());
    + for(int i=0; i < 5; ++i) {
    + dia.increment(i, 3);
    + }
    + assertEquals("{6,15,68,3,3}", dia.toString());
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,132 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.fs.FileSystem;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TestName;
    +
    +import java.io.BufferedReader;
    +import java.io.File;
    +import java.io.FileOutputStream;
    +import java.io.FileReader;
    +import java.io.PrintStream;
    +import java.util.Random;
    +
    +import static org.junit.Assert.assertEquals;
    +import static org.junit.Assert.assertNull;
    +
    +public class TestFileDump {
    +
    + Path workDir = new Path(System.getProperty("test.tmp.dir",
    + "target" + File.separator + "test" + File.separator + "tmp"));
    + Path resourceDir = new Path(System.getProperty("test.build.resources",
    + "src" + File.separator + "test" + File.separator + "resources"));
    +
    + Configuration conf;
    + FileSystem fs;
    + Path testFilePath;
    +
    + @Rule
    + public TestName testCaseName = new TestName();
    +
    + @Before
    + public void openFileSystem () throws Exception {
    + conf = new Configuration();
    + fs = FileSystem.getLocal(conf);
    + fs.setWorkingDirectory(workDir);
    + testFilePath = new Path("TestFileDump." +
    + testCaseName.getMethodName() + ".orc");
    + fs.delete(testFilePath, false);
    + }
    +
    + static class MyRecord {
    + int i;
    + long l;
    + String s;
    + MyRecord(int i, long l, String s) {
    + this.i = i;
    + this.l = l;
    + this.s = s;
    + }
    + }
    +
    + private static final String outputFilename =
    + File.separator + "orc-file-dump.out";
    +
    + private static void checkOutput(String expected,
    + String actual) throws Exception {
    + BufferedReader eStream =
    + new BufferedReader(new FileReader(expected));
    + BufferedReader aStream =
    + new BufferedReader(new FileReader(actual));
    + String line = eStream.readLine();
    + while (line != null) {
    + assertEquals(line, aStream.readLine());
    + line = eStream.readLine();
    + }
    + assertNull(eStream.readLine());
    + assertNull(aStream.readLine());
    + }
    +
    + @Test
    + public void testDump() throws Exception {
    + ObjectInspector inspector;
    + synchronized (TestOrcFile.class) {
    + inspector = ObjectInspectorFactory.getReflectionObjectInspector
    + (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    + }
    + Writer writer = OrcFile.createWriter(fs, testFilePath, inspector,
    + 100000, CompressionKind.ZLIB, 10000, 10000);
    + Random r1 = new Random(1);
    + String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
    + "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
    + "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
    + "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
    + "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
    + "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
    + "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
    + "we", "had", "everything", "before", "us,", "we", "had", "nothing",
    + "before", "us,", "we", "were", "all", "going", "direct", "to",
    + "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
    + "way"};
    + for(int i=0; i < 21000; ++i) {
    + writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
    + words[r1.nextInt(words.length)]));
    + }
    + writer.close();
    + PrintStream origOut = System.out;
    + FileOutputStream myOut = new FileOutputStream(workDir + File.separator +
    + "orc-file-dump.out");
    +
    + // replace stdout and run command
    + System.setOut(new PrintStream(myOut));
    + FileDump.main(new String[]{testFilePath.toString()});
    + System.out.flush();
    + System.setOut(origOut);
    +
    + checkOutput(resourceDir + outputFilename, workDir + outputFilename);
    + }
    +}

    Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java (added)
    +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInStream.java Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,159 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.hadoop.hive.ql.io.orc;
    +
    +import org.junit.Test;
    +
    +import java.io.IOException;
    +import java.nio.ByteBuffer;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +import static junit.framework.Assert.assertEquals;
    +import static junit.framework.Assert.fail;
    +
    +public class TestInStream {
    +
    + static class OutputCollector implements OutStream.OutputReceiver {
    + DynamicByteArray buffer = new DynamicByteArray();
    +
    + @Override
    + public void output(ByteBuffer buffer) throws IOException {
    + this.buffer.add(buffer.array(), buffer.arrayOffset() + buffer.position(),
    + buffer.remaining());
    + }
    + }
    +
    + static class PositionCollector implements PositionProvider, PositionRecorder {
    + private List<Long> positions = new ArrayList<Long>();
    + private int index = 0;
    +
    + @Override
    + public long getNext() {
    + return positions.get(index++);
    + }
    +
    + @Override
    + public void addPosition(long offset) {
    + positions.add(offset);
    + }
    + }
    +
    + @Test
    + public void testUncompressed() throws Exception {
    + OutputCollector collect = new OutputCollector();
    + OutStream out = new OutStream("test", 100, null, collect);
    + PositionCollector[] positions = new PositionCollector[1024];
    + for(int i=0; i < 1024; ++i) {
    + positions[i] = new PositionCollector();
    + out.getPosition(positions[i]);
    + out.write(i);
    + }
    + out.flush();
    + assertEquals(1024, collect.buffer.size());
    + for(int i=0; i < 1024; ++i) {
    + assertEquals((byte) i, collect.buffer.get(i));
    + }
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + InStream in = InStream.create("test", inBuf, null, 100);
    + assertEquals("uncompressed stream test base: 0 offset: 0 limit: 1024",
    + in.toString());
    + for(int i=0; i < 1024; ++i) {
    + int x = in.read();
    + assertEquals(i & 0xff, x);
    + }
    + for(int i=1023; i >= 0; --i) {
    + in.seek(positions[i]);
    + assertEquals(i & 0xff, in.read());
    + }
    + }
    +
    + @Test
    + public void testCompressed() throws Exception {
    + OutputCollector collect = new OutputCollector();
    + CompressionCodec codec = new ZlibCodec();
    + OutStream out = new OutStream("test", 300, codec, collect);
    + PositionCollector[] positions = new PositionCollector[1024];
    + for(int i=0; i < 1024; ++i) {
    + positions[i] = new PositionCollector();
    + out.getPosition(positions[i]);
    + out.write(i);
    + }
    + out.flush();
    + assertEquals("test", out.toString());
    + assertEquals(961, collect.buffer.size());
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + InStream in = InStream.create("test", inBuf, codec, 300);
    + assertEquals("compressed stream test base: 0 offset: 0 limit: 961",
    + in.toString());
    + for(int i=0; i < 1024; ++i) {
    + int x = in.read();
    + assertEquals(i & 0xff, x);
    + }
    + assertEquals(0, in.available());
    + for(int i=1023; i >= 0; --i) {
    + in.seek(positions[i]);
    + assertEquals(i & 0xff, in.read());
    + }
    + }
    +
    + @Test
    + public void testCorruptStream() throws Exception {
    + OutputCollector collect = new OutputCollector();
    + CompressionCodec codec = new ZlibCodec();
    + OutStream out = new OutStream("test", 500, codec, collect);
    + PositionCollector[] positions = new PositionCollector[1024];
    + for(int i=0; i < 1024; ++i) {
    + positions[i] = new PositionCollector();
    + out.getPosition(positions[i]);
    + out.write(i);
    + }
    + out.flush();
    +
    + // now try to read the stream with a buffer that is too small
    + ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
    + collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
    + inBuf.flip();
    + InStream in = InStream.create("test", inBuf, codec, 100);
    + byte[] contents = new byte[1024];
    + try {
    + in.read(contents);
    + fail();
    + } catch(IllegalArgumentException iae) {
    + // EXPECTED
    + }
    +
    + // make a corrupted header
    + inBuf.clear();
    + inBuf.put((byte) 32);
    + inBuf.put((byte) 0);
    + inBuf.flip();
    + in = InStream.create("test2", inBuf, codec, 300);
    + try {
    + in.read();
    + fail();
    + } catch (IllegalStateException ise) {
    + // EXPECTED
    + }
    + }
    +}
  • Kevinwilfong at Mar 5, 2013 at 8:45 pm
    Added: hive/trunk/ql/src/test/resources/orc-file-dump.out
    URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump.out?rev=1452992&view=auto
    ==============================================================================
    --- hive/trunk/ql/src/test/resources/orc-file-dump.out (added)
    +++ hive/trunk/ql/src/test/resources/orc-file-dump.out Tue Mar 5 20:44:50 2013
    @@ -0,0 +1,85 @@
    +Structure for TestFileDump.null.orc
    +Rows: 21000
    +Compression: ZLIB
    +Compression size: 10000
    +Type: struct<i:int,l:bigint,s:string>
    +
    +Statistics:
    + Column 0: count: 21000
    + Column 1: count: 21000 min: -2146993718 max: 2147378179 sum: 193017464403
    + Column 2: count: 21000 min: -9222758097219661129 max: 9222303228623055266
    + Column 3: count: 21000 min: Darkness, max: worst
    +
    +Stripes:
    + Stripe: offset: 3 data: 83505 rows: 6000 tail: 91 index: 179
    + Stream: column 0 section ROW_INDEX start: 3 length 10
    + Stream: column 1 section ROW_INDEX start: 13 length 38
    + Stream: column 2 section ROW_INDEX start: 51 length 42
    + Stream: column 3 section DICTIONARY_COUNT start: 93 length 53
    + Stream: column 3 section ROW_INDEX start: 146 length 36
    + Stream: column 1 section PRESENT start: 182 length 11
    + Stream: column 1 section DATA start: 193 length 27086
    + Stream: column 2 section PRESENT start: 27279 length 11
    + Stream: column 2 section DATA start: 27290 length 52124
    + Stream: column 3 section PRESENT start: 79414 length 11
    + Stream: column 3 section DATA start: 79425 length 4091
    + Stream: column 3 section LENGTH start: 83516 length 38
    + Stream: column 3 section DICTIONARY_DATA start: 83554 length 133
    + Encoding column 0: DIRECT
    + Encoding column 1: DIRECT
    + Encoding column 2: DIRECT
    + Encoding column 3: DICTIONARY[35]
    + Stripe: offset: 83778 data: 83453 rows: 6000 tail: 91 index: 180
    + Stream: column 0 section ROW_INDEX start: 83778 length 10
    + Stream: column 1 section ROW_INDEX start: 83788 length 39
    + Stream: column 2 section ROW_INDEX start: 83827 length 42
    + Stream: column 3 section DICTIONARY_COUNT start: 83869 length 53
    + Stream: column 3 section ROW_INDEX start: 83922 length 36
    + Stream: column 1 section PRESENT start: 83958 length 11
    + Stream: column 1 section DATA start: 83969 length 27093
    + Stream: column 2 section PRESENT start: 111062 length 11
    + Stream: column 2 section DATA start: 111073 length 52119
    + Stream: column 3 section PRESENT start: 163192 length 11
    + Stream: column 3 section DATA start: 163203 length 4037
    + Stream: column 3 section LENGTH start: 167240 length 38
    + Stream: column 3 section DICTIONARY_DATA start: 167278 length 133
    + Encoding column 0: DIRECT
    + Encoding column 1: DIRECT
    + Encoding column 2: DIRECT
    + Encoding column 3: DICTIONARY[35]
    + Stripe: offset: 167502 data: 83456 rows: 6000 tail: 92 index: 182
    + Stream: column 0 section ROW_INDEX start: 167502 length 10
    + Stream: column 1 section ROW_INDEX start: 167512 length 39
    + Stream: column 2 section ROW_INDEX start: 167551 length 42
    + Stream: column 3 section DICTIONARY_COUNT start: 167593 length 55
    + Stream: column 3 section ROW_INDEX start: 167648 length 36
    + Stream: column 1 section PRESENT start: 167684 length 11
    + Stream: column 1 section DATA start: 167695 length 27080
    + Stream: column 2 section PRESENT start: 194775 length 11
    + Stream: column 2 section DATA start: 194786 length 52093
    + Stream: column 3 section PRESENT start: 246879 length 11
    + Stream: column 3 section DATA start: 246890 length 4079
    + Stream: column 3 section LENGTH start: 250969 length 38
    + Stream: column 3 section DICTIONARY_DATA start: 251007 length 133
    + Encoding column 0: DIRECT
    + Encoding column 1: DIRECT
    + Encoding column 2: DIRECT
    + Encoding column 3: DICTIONARY[35]
    + Stripe: offset: 251232 data: 41842 rows: 3000 tail: 90 index: 172
    + Stream: column 0 section ROW_INDEX start: 251232 length 10
    + Stream: column 1 section ROW_INDEX start: 251242 length 39
    + Stream: column 2 section ROW_INDEX start: 251281 length 43
    + Stream: column 3 section DICTIONARY_COUNT start: 251324 length 44
    + Stream: column 3 section ROW_INDEX start: 251368 length 36
    + Stream: column 1 section PRESENT start: 251404 length 9
    + Stream: column 1 section DATA start: 251413 length 13544
    + Stream: column 2 section PRESENT start: 264957 length 9
    + Stream: column 2 section DATA start: 264966 length 26072
    + Stream: column 3 section PRESENT start: 291038 length 9
    + Stream: column 3 section DATA start: 291047 length 2028
    + Stream: column 3 section LENGTH start: 293075 length 38
    + Stream: column 3 section DICTIONARY_DATA start: 293113 length 133
    + Encoding column 0: DIRECT
    + Encoding column 1: DIRECT
    + Encoding column 2: DIRECT
    + Encoding column 3: DICTIONARY[35]

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 5, '13 at 8:45p
activeMar 5, '13 at 8:45p
posts8
users1
websitehive.apache.org

1 user in discussion

Kevinwilfong: 8 posts

People

Translate

site design / logo © 2021 Grokbase