FAQ
Hi experts,

I have a question on IFileOutputStream.
I guess when writing key/value length (1 or 2bytes)to IFile, this function is inefficient:
public void write(byte[] b, int off, int len) throws IOException {
sum.update(b, off,len);
out.write(b,off,len);
}
I make the following changes, and see some improve under certain circumstance.
Waiting for your professional answer.

Regards,
Kungu



@@ -36,43 +36,128 @@
private final DataChecksum sum;
private byte[] barray;
private boolean closed = false;
+
+ /**
+ * The internal buffer where data is stored.
+ */
+ protected byte buf[];

/**
- * Create a checksum output stream that writes
- * the bytes to the given stream.
- * @param out
+ * The number of valid bytes in the buffer. This value is always
+ * in the range <tt>0</tt> through <tt>buf.length</tt>; elements
+ * <tt>buf[0]</tt> through <tt>buf[count-1]</tt> contain valid
+ * byte data.
*/
+ protected int count;
+
+ /**
+ * Creates a new checksum buffered output stream to write data to
+ * the specified underlying output stream.
+ *
+ * @param out the underlying output stream.
+ */
public IFileOutputStream(OutputStream out) {
- super(out);
- sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
- Integer.MAX_VALUE);
- barray = new byte[sum.getChecksumSize()];
+ this(out, 8192);
}
-
+
+ /**
+ * Creates a new buffered output stream to write data to the
+ * specified underlying output stream with the specified buffer
+ * size.
+ *
+ * @param out the underlying output stream.
+ * @param size the buffer size.
+ * @exception IllegalArgumentException if size &lt;= 0.
+ */
+ public IFileOutputStream(OutputStream out, int size) {
+ super(out);
+ if (size <= 0) {
+ throw new IllegalArgumentException("Buffer size <= 0");
+ }
+ buf = new byte[size];
+
+ sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
+ Integer.MAX_VALUE);
+ barray = new byte[sum.getChecksumSize()];
+ }
+
+ /** Flush the internal buffer */
+ private void flushBuffer() throws IOException {
+ if (count > 0) {
+ sum.update(buf, 0, count);
+ out.write(buf, 0, count);
+ count = 0;
+ }
+ }
+
+ /**
+ * Writes the specified byte to this buffered output stream.
+ *
+ * @param b the byte to be written.
+ * @exception IOException if an I/O error occurs.
+ */
+ public void write(int b) throws IOException {
+ if (count >= buf.length) {
+ flushBuffer();
+ }
+ buf[count++] = (byte)(b & 0xFF);
+ }
+
+ /**
+ * Writes <code>len</code> bytes from the specified byte array
+ * starting at offset <code>off</code> to this buffered output stream.
+ *
+ * <p> Ordinarily this method stores bytes from the given array into this
+ * stream's buffer, flushing the buffer to the underlying output stream as
+ * needed. If the requested length is at least as large as this stream's
+ * buffer, however, then this method will flush the buffer and write the
+ * bytes directly to the underlying output stream. Thus redundant
+ * <code>BufferedOutputStream</code>s will not copy data unnecessarily.
+ *
+ * @param b the data.
+ * @param off the start offset in the data.
+ * @param len the number of bytes to write.
+ * @exception IOException if an I/O error occurs.
+ */
+ public void write(byte b[], int off, int len) throws IOException {
+ if (len >= buf.length) {
+ /* If the request length exceeds the size of the output buffer,
+ flush the output buffer and then write the data directly.
+ In this way buffered streams will cascade harmlessly. */
+ flushBuffer();
+ sum.update(b, off, len);
+ out.write(b, off, len);
+ return;
+ }
+ if (len > buf.length - count) {
+ flushBuffer();
+ }
+ System.arraycopy(b, off, buf, count, len);
+ count += len;
+ }
+
+ /**
+ * Flushes this buffered output stream. This forces any buffered
+ * output bytes to be written out to the underlying output stream.
+ *
+ * @exception IOException if an I/O error occurs.
+ * @see java.io.FilterOutputStream#out
+ */
+ public void flush() throws IOException {
+ flushBuffer();
+ out.flush();
+ }
+
@Override
public void close() throws IOException {
if (closed) {
return;
}
closed = true;
+ flushBuffer();
sum.writeValue(barray, 0, false);
out.write (barray, 0, sum.getChecksumSize());
out.flush();
}
-
- /**
- * Write bytes to the stream.
- */
- @Override
- public void write(byte[] b, int off, int len) throws IOException {
- sum.update(b, off,len);
- out.write(b,off,len);
- }
-
- @Override
- public void write(int b) throws IOException {
- barray[0] = (byte) (b & 0xFF);
- write(barray,0,1);
- }

}

________________________________

This email (including any attachments) is confidential and may be legally privileged. If you received this email in error, please delete it immediately and do not copy it or use it for any purpose or disclose its contents to any other person. Thank you.

本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用途、或透露本邮件之内容。谢谢。

Search Discussions

  • Arun C Murthy at Feb 20, 2012 at 2:15 pm
    Kungu,

    Can you please open a jira and attach this patch there?

    More details here: http://wiki.apache.org/hadoop/HowToContribute

    thanks!
    Arun
    On Feb 20, 2012, at 1:23 PM, 坤谷 wrote:

    Hi experts,

    I have a question on IFileOutputStream.
    I guess when writing key/value length (1 or 2bytes)to IFile, this function is inefficient:
    public void write(byte[] b, int off, int len) throws IOException {
    sum.update(b, off,len);
    out.write(b,off,len);
    }
    I make the following changes, and see some improve under certain circumstance.
    Waiting for your professional answer.

    Regards,
    Kungu



    @@ -36,43 +36,128 @@
    private final DataChecksum sum;
    private byte[] barray;
    private boolean closed = false;
    +
    + /**
    + * The internal buffer where data is stored.
    + */
    + protected byte buf[];

    /**
    - * Create a checksum output stream that writes
    - * the bytes to the given stream.
    - * @param out
    + * The number of valid bytes in the buffer. This value is always
    + * in the range <tt>0</tt> through <tt>buf.length</tt>; elements
    + * <tt>buf[0]</tt> through <tt>buf[count-1]</tt> contain valid
    + * byte data.
    */
    + protected int count;
    +
    + /**
    + * Creates a new checksum buffered output stream to write data to
    + * the specified underlying output stream.
    + *
    + * @param out the underlying output stream.
    + */
    public IFileOutputStream(OutputStream out) {
    - super(out);
    - sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
    - Integer.MAX_VALUE);
    - barray = new byte[sum.getChecksumSize()];
    + this(out, 8192);
    }
    -
    +
    + /**
    + * Creates a new buffered output stream to write data to the
    + * specified underlying output stream with the specified buffer
    + * size.
    + *
    + * @param out the underlying output stream.
    + * @param size the buffer size.
    + * @exception IllegalArgumentException if size &lt;= 0.
    + */
    + public IFileOutputStream(OutputStream out, int size) {
    + super(out);
    + if (size <= 0) {
    + throw new IllegalArgumentException("Buffer size <= 0");
    + }
    + buf = new byte[size];
    +
    + sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
    + Integer.MAX_VALUE);
    + barray = new byte[sum.getChecksumSize()];
    + }
    +
    + /** Flush the internal buffer */
    + private void flushBuffer() throws IOException {
    + if (count > 0) {
    + sum.update(buf, 0, count);
    + out.write(buf, 0, count);
    + count = 0;
    + }
    + }
    +
    + /**
    + * Writes the specified byte to this buffered output stream.
    + *
    + * @param b the byte to be written.
    + * @exception IOException if an I/O error occurs.
    + */
    + public void write(int b) throws IOException {
    + if (count >= buf.length) {
    + flushBuffer();
    + }
    + buf[count++] = (byte)(b & 0xFF);
    + }
    +
    + /**
    + * Writes <code>len</code> bytes from the specified byte array
    + * starting at offset <code>off</code> to this buffered output stream.
    + *
    + * <p> Ordinarily this method stores bytes from the given array into this
    + * stream's buffer, flushing the buffer to the underlying output stream as
    + * needed. If the requested length is at least as large as this stream's
    + * buffer, however, then this method will flush the buffer and write the
    + * bytes directly to the underlying output stream. Thus redundant
    + * <code>BufferedOutputStream</code>s will not copy data unnecessarily.
    + *
    + * @param b the data.
    + * @param off the start offset in the data.
    + * @param len the number of bytes to write.
    + * @exception IOException if an I/O error occurs.
    + */
    + public void write(byte b[], int off, int len) throws IOException {
    + if (len >= buf.length) {
    + /* If the request length exceeds the size of the output buffer,
    + flush the output buffer and then write the data directly.
    + In this way buffered streams will cascade harmlessly. */
    + flushBuffer();
    + sum.update(b, off, len);
    + out.write(b, off, len);
    + return;
    + }
    + if (len > buf.length - count) {
    + flushBuffer();
    + }
    + System.arraycopy(b, off, buf, count, len);
    + count += len;
    + }
    +
    + /**
    + * Flushes this buffered output stream. This forces any buffered
    + * output bytes to be written out to the underlying output stream.
    + *
    + * @exception IOException if an I/O error occurs.
    + * @see java.io.FilterOutputStream#out
    + */
    + public void flush() throws IOException {
    + flushBuffer();
    + out.flush();
    + }
    +
    @Override
    public void close() throws IOException {
    if (closed) {
    return;
    }
    closed = true;
    + flushBuffer();
    sum.writeValue(barray, 0, false);
    out.write (barray, 0, sum.getChecksumSize());
    out.flush();
    }
    -
    - /**
    - * Write bytes to the stream.
    - */
    - @Override
    - public void write(byte[] b, int off, int len) throws IOException {
    - sum.update(b, off,len);
    - out.write(b,off,len);
    - }
    -
    - @Override
    - public void write(int b) throws IOException {
    - barray[0] = (byte) (b & 0xFF);
    - write(barray,0,1);
    - }

    }

    ________________________________

    This email (including any attachments) is confidential and may be legally privileged. If you received this email in error, please delete it immediately and do not copy it or use it for any purpose or disclose its contents to any other person. Thank you.

    本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用途、或透露本邮件之内容。谢谢。
    --
    Arun C. Murthy
    Hortonworks Inc.
    http://hortonworks.com/

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommon-dev @
categorieshadoop
postedFeb 20, '12 at 2:14p
activeFeb 20, '12 at 2:15p
posts2
users2
websitehadoop.apache.org...
irc#hadoop

2 users in discussion

坤谷: 1 post Arun C Murthy: 1 post

People

Translate

site design / logo © 2021 Grokbase