FAQ
Author: sershe
Date: Tue Feb 10 22:53:03 2015
New Revision: 1658830

URL: http://svn.apache.org/r1658830
Log:
HIVE-9419p1: LLAP: ORC decoding of row-groups - add stream kind for ease of decoding

Modified:
     hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/EncodedColumnBatch.java
     hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
     hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java

Modified: hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/EncodedColumnBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/EncodedColumnBatch.java?rev=1658830&r1=1658829&r2=1658830&view=diff
==============================================================================
--- hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/EncodedColumnBatch.java (original)
+++ hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/EncodedColumnBatch.java Tue Feb 10 22:53:03 2015
@@ -31,10 +31,16 @@ public class EncodedColumnBatch<BatchKey
      // Decoder knows which stream this belongs to, and each buffer is a compression block,
      // so he can figure out the offsets from metadata.
      public List<LlapMemoryBuffer> cacheBuffers;
+ public int streamKind;

      // StreamBuffer can be reused for many RGs (e.g. dictionary case). To avoid locking every
      // LlapMemoryBuffer 500 times, have a separate refcount on StreamBuffer itself.
      public AtomicInteger refCount = new AtomicInteger(0);
+
+ public StreamBuffer(int kind) {
+ this.streamKind = kind;
+ }
+
      public void incRef() {
        refCount.incrementAndGet();
      }

Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java?rev=1658830&r1=1658829&r2=1658830&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java (original)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java Tue Feb 10 22:53:03 2015
@@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.exec.ve
  import org.apache.hadoop.hive.ql.io.orc.CompressionCodec;
  import org.apache.hadoop.hive.ql.io.orc.OrcProto;

-import com.clearspring.analytics.util.Lists;
+import com.google.common.collect.Lists;

  public class OrcColumnVectorProducer extends ColumnVectorProducer<OrcBatchKey> {
    private final OrcEncodedDataProducer edp;

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1658830&r1=1658829&r2=1658830&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Feb 10 22:53:03 2015
@@ -3701,7 +3701,7 @@ public class RecordReaderImpl implements
                  endCOffset = estimateRgEndOffset(isCompressed, isLastRg, isLastRg
                      ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset),
                      sctx.length, bufferSize);
- cb = new StreamBuffer();
+ cb = new StreamBuffer(sctx.kind.getNumber());
              cb.incRef();
              if (DebugUtils.isTraceOrcEnabled()) {
                LOG.info("Getting data for column "+ ctx.colIx + " " + (isLastRg ? "last " : "")
@@ -3722,14 +3722,14 @@ public class RecordReaderImpl implements
    }

    /**
- * Reads the entire stream for a column (e.g. a dictionarty stream), or gets it from context.
+ * Reads the entire stream for a column (e.g. a dictionary stream), or gets it from context.
     * @param isLastRg Whether the stream is being read for last RG in stripe.
     * @return StreamBuffer that contains the entire stream.
     */
    private StreamBuffer getStripeLevelStream(long baseOffset, StreamContext ctx,
        LowLevelCache cache, boolean isLastRg) throws IOException {
      if (ctx.stripeLevelStream == null) {
- ctx.stripeLevelStream = new StreamBuffer();
+ ctx.stripeLevelStream = new StreamBuffer(ctx.kind.getNumber());
        // We will be using this for each RG while also sending RGs to processing.
        // To avoid buffers being unlocked, run refcount one ahead; we will not increase
        // it when building the last RG, so each RG processing will decref once, and the

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedFeb 10, '15 at 10:53p
activeFeb 10, '15 at 10:53p
posts1
users1
websitehive.apache.org

1 user in discussion

Sershe: 1 post

People

Translate

site design / logo © 2021 Grokbase