FAQ
Repository: hive
Updated Branches:
   refs/heads/branch-1 eb582ed84 -> 81d732889


HIVE-11523: org.apache.hadoop.hive.ql.io.orc.FileDump should handle errors (Prasanth Jayachandran reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/81d73288
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/81d73288
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/81d73288

Branch: refs/heads/branch-1
Commit: 81d732889dbc13597abab5409418ca28e69ade10
Parents: eb582ed
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Mon Oct 26 14:20:53 2015 -0500
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Mon Oct 26 14:20:53 2015 -0500

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/io/orc/FileDump.java | 192 +++++++++--------
  .../hadoop/hive/ql/io/orc/JsonFileDump.java | 209 ++++++++++---------
  .../hadoop/hive/ql/io/orc/TestFileDump.java | 50 -----
  3 files changed, 211 insertions(+), 240 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/81d73288/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 76ecb33..fa1543b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -136,9 +136,16 @@ public final class FileDump {
    private static void printData(List<String> files, Configuration conf) throws IOException,
        JSONException {
      for (String file : files) {
- printJsonData(conf, file);
- if (files.size() > 1) {
- System.out.println(Strings.repeat("=", 80) + "\n");
+ try {
+ printJsonData(conf, file);
+ if (files.size() > 1) {
+ System.out.println(Strings.repeat("=", 80) + "\n");
+ }
+ } catch (Exception e) {
+ System.err.println("Unable to dump data for file: " + file);
+ e.printStackTrace();
+ System.err.println(Strings.repeat("=", 80) + "\n");
+ continue;
        }
      }
    }
@@ -146,103 +153,110 @@ public final class FileDump {
    private static void printMetaData(List<String> files, Configuration conf,
        List<Integer> rowIndexCols, boolean printTimeZone) throws IOException {
      for (String filename : files) {
- System.out.println("Structure for " + filename);
- Path path = new Path(filename);
- Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
- System.out.println("File Version: " + reader.getFileVersion().getName() +
- " with " + reader.getWriterVersion());
- RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
- System.out.println("Rows: " + reader.getNumberOfRows());
- System.out.println("Compression: " + reader.getCompression());
- if (reader.getCompression() != CompressionKind.NONE) {
- System.out.println("Compression size: " + reader.getCompressionSize());
- }
- System.out.println("Type: " + reader.getObjectInspector().getTypeName());
- System.out.println("\nStripe Statistics:");
- Metadata metadata = reader.getMetadata();
- for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
- System.out.println(" Stripe " + (n + 1) + ":");
- StripeStatistics ss = metadata.getStripeStatistics().get(n);
- for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
- System.out.println(" Column " + i + ": " +
- ss.getColumnStatistics()[i].toString());
+ try {
+ Path path = new Path(filename);
+ Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ System.out.println("Structure for " + filename);
+ System.out.println("File Version: " + reader.getFileVersion().getName() +
+ " with " + reader.getWriterVersion());
+ RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+ System.out.println("Rows: " + reader.getNumberOfRows());
+ System.out.println("Compression: " + reader.getCompression());
+ if (reader.getCompression() != CompressionKind.NONE) {
+ System.out.println("Compression size: " + reader.getCompressionSize());
          }
- }
- ColumnStatistics[] stats = reader.getStatistics();
- int colCount = stats.length;
- System.out.println("\nFile Statistics:");
- for (int i = 0; i < stats.length; ++i) {
- System.out.println(" Column " + i + ": " + stats[i].toString());
- }
- System.out.println("\nStripes:");
- int stripeIx = -1;
- for (StripeInformation stripe : reader.getStripes()) {
- ++stripeIx;
- long stripeStart = stripe.getOffset();
- OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
- if (printTimeZone) {
- String tz = footer.getWriterTimezone();
- if (tz == null || tz.isEmpty()) {
- tz = UNKNOWN;
+ System.out.println("Type: " + reader.getObjectInspector().getTypeName());
+ System.out.println("\nStripe Statistics:");
+ Metadata metadata = reader.getMetadata();
+ for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
+ System.out.println(" Stripe " + (n + 1) + ":");
+ StripeStatistics ss = metadata.getStripeStatistics().get(n);
+ for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
+ System.out.println(" Column " + i + ": " +
+ ss.getColumnStatistics()[i].toString());
            }
- System.out.println(" Stripe: " + stripe.toString() + " timezone: " + tz);
- } else {
- System.out.println(" Stripe: " + stripe.toString());
          }
- long sectionStart = stripeStart;
- for (OrcProto.Stream section : footer.getStreamsList()) {
- String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
- System.out.println(" Stream: column " + section.getColumn() +
- " section " + kind + " start: " + sectionStart +
- " length " + section.getLength());
- sectionStart += section.getLength();
+ ColumnStatistics[] stats = reader.getStatistics();
+ int colCount = stats.length;
+ System.out.println("\nFile Statistics:");
+ for (int i = 0; i < stats.length; ++i) {
+ System.out.println(" Column " + i + ": " + stats[i].toString());
          }
- for (int i = 0; i < footer.getColumnsCount(); ++i) {
- OrcProto.ColumnEncoding encoding = footer.getColumns(i);
- StringBuilder buf = new StringBuilder();
- buf.append(" Encoding column ");
- buf.append(i);
- buf.append(": ");
- buf.append(encoding.getKind());
- if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
- encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
- buf.append("[");
- buf.append(encoding.getDictionarySize());
- buf.append("]");
+ System.out.println("\nStripes:");
+ int stripeIx = -1;
+ for (StripeInformation stripe : reader.getStripes()) {
+ ++stripeIx;
+ long stripeStart = stripe.getOffset();
+ OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+ if (printTimeZone) {
+ String tz = footer.getWriterTimezone();
+ if (tz == null || tz.isEmpty()) {
+ tz = UNKNOWN;
+ }
+ System.out.println(" Stripe: " + stripe.toString() + " timezone: " + tz);
+ } else {
+ System.out.println(" Stripe: " + stripe.toString());
            }
- System.out.println(buf);
- }
- if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
- // include the columns that are specified, only if the columns are included, bloom filter
- // will be read
- boolean[] sargColumns = new boolean[colCount];
- for (int colIdx : rowIndexCols) {
- sargColumns[colIdx] = true;
+ long sectionStart = stripeStart;
+ for (OrcProto.Stream section : footer.getStreamsList()) {
+ String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
+ System.out.println(" Stream: column " + section.getColumn() +
+ " section " + kind + " start: " + sectionStart +
+ " length " + section.getLength());
+ sectionStart += section.getLength();
            }
- RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
- for (int col : rowIndexCols) {
+ for (int i = 0; i < footer.getColumnsCount(); ++i) {
+ OrcProto.ColumnEncoding encoding = footer.getColumns(i);
              StringBuilder buf = new StringBuilder();
- String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
- buf.append(rowIdxString);
- String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
- buf.append(bloomFilString);
+ buf.append(" Encoding column ");
+ buf.append(i);
+ buf.append(": ");
+ buf.append(encoding.getKind());
+ if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+ encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+ buf.append("[");
+ buf.append(encoding.getDictionarySize());
+ buf.append("]");
+ }
              System.out.println(buf);
            }
+ if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+ // include the columns that are specified, only if the columns are included, bloom filter
+ // will be read
+ boolean[] sargColumns = new boolean[colCount];
+ for (int colIdx : rowIndexCols) {
+ sargColumns[colIdx] = true;
+ }
+ RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+ for (int col : rowIndexCols) {
+ StringBuilder buf = new StringBuilder();
+ String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
+ buf.append(rowIdxString);
+ String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
+ buf.append(bloomFilString);
+ System.out.println(buf);
+ }
+ }
          }
- }

- FileSystem fs = path.getFileSystem(conf);
- long fileLen = fs.getContentSummary(path).getLength();
- long paddedBytes = getTotalPaddingSize(reader);
- // empty ORC file is ~45 bytes. Assumption here is file length always >0
- double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
- DecimalFormat format = new DecimalFormat("##.##");
- System.out.println("\nFile length: " + fileLen + " bytes");
- System.out.println("Padding length: " + paddedBytes + " bytes");
- System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
- rows.close();
- if (files.size() > 1) {
- System.out.println(Strings.repeat("=", 80) + "\n");
+ FileSystem fs = path.getFileSystem(conf);
+ long fileLen = fs.getContentSummary(path).getLength();
+ long paddedBytes = getTotalPaddingSize(reader);
+ // empty ORC file is ~45 bytes. Assumption here is file length always >0
+ double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+ DecimalFormat format = new DecimalFormat("##.##");
+ System.out.println("\nFile length: " + fileLen + " bytes");
+ System.out.println("Padding length: " + paddedBytes + " bytes");
+ System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
+ rows.close();
+ if (files.size() > 1) {
+ System.out.println(Strings.repeat("=", 80) + "\n");
+ }
+ } catch (Exception e) {
+ System.err.println("Unable to dump metadata for file: " + filename);
+ e.printStackTrace();
+ System.err.println(Strings.repeat("=", 80) + "\n");
+ continue;
        }
      }
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/81d73288/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
index c33004e..18b1f24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
@@ -45,130 +45,137 @@ public class JsonFileDump {
        writer.object();
      }
      for (String filename : files) {
- if (multiFile) {
- writer.object();
- }
- writer.key("fileName").value(filename);
- Path path = new Path(filename);
- Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
- writer.key("fileVersion").value(reader.getFileVersion().getName());
- writer.key("writerVersion").value(reader.getWriterVersion());
- RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
- writer.key("numberOfRows").value(reader.getNumberOfRows());
- writer.key("compression").value(reader.getCompression());
- if (reader.getCompression() != CompressionKind.NONE) {
- writer.key("compressionBufferSize").value(reader.getCompressionSize());
- }
- writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
- writer.key("schema").array();
- writeSchema(writer, reader.getTypes());
- writer.endArray();
-
- writer.key("stripeStatistics").array();
- Metadata metadata = reader.getMetadata();
- for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
- writer.object();
- writer.key("stripeNumber").value(n + 1);
- StripeStatistics ss = metadata.getStripeStatistics().get(n);
- writer.key("columnStatistics").array();
- for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+ try {
+ if (multiFile) {
            writer.object();
- writer.key("columnId").value(i);
- writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
- writer.endObject();
          }
- writer.endArray();
- writer.endObject();
- }
- writer.endArray();
-
- ColumnStatistics[] stats = reader.getStatistics();
- int colCount = stats.length;
- writer.key("fileStatistics").array();
- for (int i = 0; i < stats.length; ++i) {
- writer.object();
- writer.key("columnId").value(i);
- writeColumnStatistics(writer, stats[i]);
- writer.endObject();
- }
- writer.endArray();
-
- writer.key("stripes").array();
- int stripeIx = -1;
- for (StripeInformation stripe : reader.getStripes()) {
- ++stripeIx;
- long stripeStart = stripe.getOffset();
- OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
- writer.object(); // start of stripe information
- writer.key("stripeNumber").value(stripeIx + 1);
- writer.key("stripeInformation");
- writeStripeInformation(writer, stripe);
- if (printTimeZone) {
- writer.key("writerTimezone").value(
- footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+ writer.key("fileName").value(filename);
+ Path path = new Path(filename);
+ Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ writer.key("fileVersion").value(reader.getFileVersion().getName());
+ writer.key("writerVersion").value(reader.getWriterVersion());
+ RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+ writer.key("numberOfRows").value(reader.getNumberOfRows());
+ writer.key("compression").value(reader.getCompression());
+ if (reader.getCompression() != CompressionKind.NONE) {
+ writer.key("compressionBufferSize").value(reader.getCompressionSize());
          }
- long sectionStart = stripeStart;
+ writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
+ writer.key("schema").array();
+ writeSchema(writer, reader.getTypes());
+ writer.endArray();

- writer.key("streams").array();
- for (OrcProto.Stream section : footer.getStreamsList()) {
+ writer.key("stripeStatistics").array();
+ Metadata metadata = reader.getMetadata();
+ for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
            writer.object();
- String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
- writer.key("columnId").value(section.getColumn());
- writer.key("section").value(kind);
- writer.key("startOffset").value(sectionStart);
- writer.key("length").value(section.getLength());
- sectionStart += section.getLength();
+ writer.key("stripeNumber").value(n + 1);
+ StripeStatistics ss = metadata.getStripeStatistics().get(n);
+ writer.key("columnStatistics").array();
+ for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+ writer.object();
+ writer.key("columnId").value(i);
+ writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+ writer.endObject();
+ }
+ writer.endArray();
            writer.endObject();
          }
          writer.endArray();

- writer.key("encodings").array();
- for (int i = 0; i < footer.getColumnsCount(); ++i) {
+ ColumnStatistics[] stats = reader.getStatistics();
+ int colCount = stats.length;
+ writer.key("fileStatistics").array();
+ for (int i = 0; i < stats.length; ++i) {
            writer.object();
- OrcProto.ColumnEncoding encoding = footer.getColumns(i);
            writer.key("columnId").value(i);
- writer.key("kind").value(encoding.getKind());
- if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
- encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
- writer.key("dictionarySize").value(encoding.getDictionarySize());
- }
+ writeColumnStatistics(writer, stats[i]);
            writer.endObject();
          }
          writer.endArray();

- if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
- // include the columns that are specified, only if the columns are included, bloom filter
- // will be read
- boolean[] sargColumns = new boolean[colCount];
- for (int colIdx : rowIndexCols) {
- sargColumns[colIdx] = true;
+ writer.key("stripes").array();
+ int stripeIx = -1;
+ for (StripeInformation stripe : reader.getStripes()) {
+ ++stripeIx;
+ long stripeStart = stripe.getOffset();
+ OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+ writer.object(); // start of stripe information
+ writer.key("stripeNumber").value(stripeIx + 1);
+ writer.key("stripeInformation");
+ writeStripeInformation(writer, stripe);
+ if (printTimeZone) {
+ writer.key("writerTimezone").value(
+ footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
            }
- RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
- writer.key("indexes").array();
- for (int col : rowIndexCols) {
+ long sectionStart = stripeStart;
+
+ writer.key("streams").array();
+ for (OrcProto.Stream section : footer.getStreamsList()) {
              writer.object();
- writer.key("columnId").value(col);
- writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
- writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+ String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+ writer.key("columnId").value(section.getColumn());
+ writer.key("section").value(kind);
+ writer.key("startOffset").value(sectionStart);
+ writer.key("length").value(section.getLength());
+ sectionStart += section.getLength();
              writer.endObject();
            }
            writer.endArray();
+
+ writer.key("encodings").array();
+ for (int i = 0; i < footer.getColumnsCount(); ++i) {
+ writer.object();
+ OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+ writer.key("columnId").value(i);
+ writer.key("kind").value(encoding.getKind());
+ if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+ encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+ writer.key("dictionarySize").value(encoding.getDictionarySize());
+ }
+ writer.endObject();
+ }
+ writer.endArray();
+
+ if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+ // include the columns that are specified, only if the columns are included, bloom filter
+ // will be read
+ boolean[] sargColumns = new boolean[colCount];
+ for (int colIdx : rowIndexCols) {
+ sargColumns[colIdx] = true;
+ }
+ RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+ writer.key("indexes").array();
+ for (int col : rowIndexCols) {
+ writer.object();
+ writer.key("columnId").value(col);
+ writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+ writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+ writer.endObject(); // end of stripe information
          }
- writer.endObject(); // end of stripe information
- }
- writer.endArray();
+ writer.endArray();

- FileSystem fs = path.getFileSystem(conf);
- long fileLen = fs.getContentSummary(path).getLength();
- long paddedBytes = FileDump.getTotalPaddingSize(reader);
- // empty ORC file is ~45 bytes. Assumption here is file length always >0
- double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
- writer.key("fileLength").value(fileLen);
- writer.key("paddingLength").value(paddedBytes);
- writer.key("paddingRatio").value(percentPadding);
- rows.close();
+ FileSystem fs = path.getFileSystem(conf);
+ long fileLen = fs.getContentSummary(path).getLength();
+ long paddedBytes = FileDump.getTotalPaddingSize(reader);
+ // empty ORC file is ~45 bytes. Assumption here is file length always >0
+ double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+ writer.key("fileLength").value(fileLen);
+ writer.key("paddingLength").value(paddedBytes);
+ writer.key("paddingRatio").value(percentPadding);
+ rows.close();

- writer.endObject();
+ writer.endObject();
+ } catch (Exception e) {
+ writer.key("status").value("FAILED");
+ System.err.println("Unable to dump data for file: " + filename);
+ e.printStackTrace();
+ throw e;
+ }
      }
      if (multiFile) {
        writer.endArray();

http://git-wip-us.apache.org/repos/asf/hive/blob/81d73288/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
index 3d18d30..5b200f1 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
@@ -26,8 +26,6 @@ import java.io.ByteArrayOutputStream;
  import java.io.File;
  import java.io.FileOutputStream;
  import java.io.FileReader;
-import java.io.IOException;
-import java.io.OutputStream;
  import java.io.PrintStream;
  import java.sql.Date;
  import java.sql.Timestamp;
@@ -249,54 +247,6 @@ public class TestFileDump {
      assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
    }

- @Test(expected = IOException.class)
- public void testDataDumpThrowsIOException() throws Exception {
- PrintStream origOut = System.out;
- try {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector
- (AllTypesRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
- 100000, CompressionKind.NONE, 10000, 1000);
- Map<String, String> m = new HashMap<String, String>(2);
- m.put("k1", "v1");
- writer.addRow(new AllTypesRecord(
- true,
- (byte) 10,
- (short) 100,
- 1000,
- 10000L,
- 4.0f,
- 20.0,
- HiveDecimal.create("4.2222"),
- new Timestamp(1416967764000L),
- new Date(1416967764000L),
- "string",
- new HiveChar("hello", 5),
- new HiveVarchar("hello", 10),
- m,
- Arrays.asList(100, 200),
- new AllTypesRecord.Struct(10, "foo")));
-
- writer.close();
-
- OutputStream myOut = new OutputStream() {
- @Override
- public void write(int b) throws IOException {
- throw new IOException();
- }
- };
-
- // replace stdout and run command
- System.setOut(new PrintStream(myOut));
- FileDump.main(new String[]{testFilePath.toString(), "-d"});
- } finally {
- System.setOut(origOut);
- }
- }
-
    // Test that if the fraction of rows that have distinct strings is greater than the configured
    // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length
    // of the dictionary stream for the column will be 0 in the ORC file dump.

Search Discussions

  • Prasanthj at Oct 26, 2015 at 7:25 pm
    Repository: hive
    Updated Branches:
       refs/heads/master a91e1471c -> d84e393ed


    HIVE-11523: org.apache.hadoop.hive.ql.io.orc.FileDump should handle errors (Prasanth Jayachandran reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d84e393e
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d84e393e
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d84e393e

    Branch: refs/heads/master
    Commit: d84e393ed66829fe0c8cc87254fef2a329b96163
    Parents: a91e147
    Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Authored: Mon Oct 26 14:18:36 2015 -0500
    Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
    Committed: Mon Oct 26 14:18:36 2015 -0500

    ----------------------------------------------------------------------
      .../apache/hadoop/hive/ql/io/orc/FileDump.java | 193 +++++++++--------
      .../hadoop/hive/ql/io/orc/JsonFileDump.java | 210 ++++++++++---------
      .../hadoop/hive/ql/io/orc/TestFileDump.java | 50 -----
      3 files changed, 213 insertions(+), 240 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/d84e393e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    index a1c5058..9c6538f 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
    @@ -136,9 +136,16 @@ public final class FileDump {
        private static void printData(List<String> files, Configuration conf) throws IOException,
            JSONException {
          for (String file : files) {
    - printJsonData(conf, file);
    - if (files.size() > 1) {
    - System.out.println(Strings.repeat("=", 80) + "\n");
    + try {
    + printJsonData(conf, file);
    + if (files.size() > 1) {
    + System.out.println(Strings.repeat("=", 80) + "\n");
    + }
    + } catch (Exception e) {
    + System.err.println("Unable to dump data for file: " + file);
    + e.printStackTrace();
    + System.err.println(Strings.repeat("=", 80) + "\n");
    + continue;
            }
          }
        }
    @@ -146,103 +153,111 @@ public final class FileDump {
        private static void printMetaData(List<String> files, Configuration conf,
            List<Integer> rowIndexCols, boolean printTimeZone) throws IOException {
          for (String filename : files) {
    - System.out.println("Structure for " + filename);
    - Path path = new Path(filename);
    - Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    - System.out.println("File Version: " + reader.getFileVersion().getName() +
    - " with " + reader.getWriterVersion());
    - RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
    - System.out.println("Rows: " + reader.getNumberOfRows());
    - System.out.println("Compression: " + reader.getCompression());
    - if (reader.getCompression() != CompressionKind.NONE) {
    - System.out.println("Compression size: " + reader.getCompressionSize());
    - }
    - System.out.println("Type: " + reader.getObjectInspector().getTypeName());
    - System.out.println("\nStripe Statistics:");
    - List<StripeStatistics> stripeStats = reader.getStripeStatistics();
    - for (int n = 0; n < stripeStats.size(); n++) {
    - System.out.println(" Stripe " + (n + 1) + ":");
    - StripeStatistics ss = stripeStats.get(n);
    - for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
    - System.out.println(" Column " + i + ": " +
    - ss.getColumnStatistics()[i].toString());
    + try {
    + Path path = new Path(filename);
    + Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    + System.out.println("Structure for " + filename);
    + System.out.println("File Version: " + reader.getFileVersion().getName() +
    + " with " + reader.getWriterVersion());
    + RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
    + System.out.println("Rows: " + reader.getNumberOfRows());
    + System.out.println("Compression: " + reader.getCompression());
    + if (reader.getCompression() != CompressionKind.NONE) {
    + System.out.println("Compression size: " + reader.getCompressionSize());
              }
    - }
    - ColumnStatistics[] stats = reader.getStatistics();
    - int colCount = stats.length;
    - System.out.println("\nFile Statistics:");
    - for (int i = 0; i < stats.length; ++i) {
    - System.out.println(" Column " + i + ": " + stats[i].toString());
    - }
    - System.out.println("\nStripes:");
    - int stripeIx = -1;
    - for (StripeInformation stripe : reader.getStripes()) {
    - ++stripeIx;
    - long stripeStart = stripe.getOffset();
    - OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
    - if (printTimeZone) {
    - String tz = footer.getWriterTimezone();
    - if (tz == null || tz.isEmpty()) {
    - tz = UNKNOWN;
    + System.out.println("Type: " + reader.getObjectInspector().getTypeName());
    + System.out.println("\nStripe Statistics:");
    + List<StripeStatistics> stripeStats = reader.getStripeStatistics();
    + for (int n = 0; n < stripeStats.size(); n++) {
    + System.out.println(" Stripe " + (n + 1) + ":");
    + StripeStatistics ss = stripeStats.get(n);
    + for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
    + System.out.println(" Column " + i + ": " +
    + ss.getColumnStatistics()[i].toString());
                }
    - System.out.println(" Stripe: " + stripe.toString() + " timezone: " + tz);
    - } else {
    - System.out.println(" Stripe: " + stripe.toString());
              }
    - long sectionStart = stripeStart;
    - for (OrcProto.Stream section : footer.getStreamsList()) {
    - String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
    - System.out.println(" Stream: column " + section.getColumn() +
    - " section " + kind + " start: " + sectionStart +
    - " length " + section.getLength());
    - sectionStart += section.getLength();
    + ColumnStatistics[] stats = reader.getStatistics();
    + int colCount = stats.length;
    + System.out.println("\nFile Statistics:");
    + for (int i = 0; i < stats.length; ++i) {
    + System.out.println(" Column " + i + ": " + stats[i].toString());
              }
    - for (int i = 0; i < footer.getColumnsCount(); ++i) {
    - OrcProto.ColumnEncoding encoding = footer.getColumns(i);
    - StringBuilder buf = new StringBuilder();
    - buf.append(" Encoding column ");
    - buf.append(i);
    - buf.append(": ");
    - buf.append(encoding.getKind());
    - if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
    - encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
    - buf.append("[");
    - buf.append(encoding.getDictionarySize());
    - buf.append("]");
    + System.out.println("\nStripes:");
    + int stripeIx = -1;
    + for (StripeInformation stripe : reader.getStripes()) {
    + ++stripeIx;
    + long stripeStart = stripe.getOffset();
    + OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
    + if (printTimeZone) {
    + String tz = footer.getWriterTimezone();
    + if (tz == null || tz.isEmpty()) {
    + tz = UNKNOWN;
    + }
    + System.out.println(" Stripe: " + stripe.toString() + " timezone: " + tz);
    + } else {
    + System.out.println(" Stripe: " + stripe.toString());
                }
    - System.out.println(buf);
    - }
    - if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
    - // include the columns that are specified, only if the columns are included, bloom filter
    - // will be read
    - boolean[] sargColumns = new boolean[colCount];
    - for (int colIdx : rowIndexCols) {
    - sargColumns[colIdx] = true;
    + long sectionStart = stripeStart;
    + for (OrcProto.Stream section : footer.getStreamsList()) {
    + String kind = section.hasKind() ? section.getKind().name() : UNKNOWN;
    + System.out.println(" Stream: column " + section.getColumn() +
    + " section " + kind + " start: " + sectionStart +
    + " length " + section.getLength());
    + sectionStart += section.getLength();
                }
    - RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, null, null, sargColumns);
    - for (int col : rowIndexCols) {
    + for (int i = 0; i < footer.getColumnsCount(); ++i) {
    + OrcProto.ColumnEncoding encoding = footer.getColumns(i);
                  StringBuilder buf = new StringBuilder();
    - String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
    - buf.append(rowIdxString);
    - String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
    - buf.append(bloomFilString);
    + buf.append(" Encoding column ");
    + buf.append(i);
    + buf.append(": ");
    + buf.append(encoding.getKind());
    + if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
    + encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
    + buf.append("[");
    + buf.append(encoding.getDictionarySize());
    + buf.append("]");
    + }
                  System.out.println(buf);
                }
    + if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
    + // include the columns that are specified, only if the columns are included, bloom filter
    + // will be read
    + boolean[] sargColumns = new boolean[colCount];
    + for (int colIdx : rowIndexCols) {
    + sargColumns[colIdx] = true;
    + }
    + RecordReaderImpl.Index indices = rows
    + .readRowIndex(stripeIx, null, null, null, sargColumns);
    + for (int col : rowIndexCols) {
    + StringBuilder buf = new StringBuilder();
    + String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
    + buf.append(rowIdxString);
    + String bloomFilString = getFormattedBloomFilters(col, indices.getBloomFilterIndex());
    + buf.append(bloomFilString);
    + System.out.println(buf);
    + }
    + }
              }
    - }

    - FileSystem fs = path.getFileSystem(conf);
    - long fileLen = fs.getContentSummary(path).getLength();
    - long paddedBytes = getTotalPaddingSize(reader);
    - // empty ORC file is ~45 bytes. Assumption here is file length always >0
    - double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
    - DecimalFormat format = new DecimalFormat("##.##");
    - System.out.println("\nFile length: " + fileLen + " bytes");
    - System.out.println("Padding length: " + paddedBytes + " bytes");
    - System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
    - rows.close();
    - if (files.size() > 1) {
    - System.out.println(Strings.repeat("=", 80) + "\n");
    + FileSystem fs = path.getFileSystem(conf);
    + long fileLen = fs.getContentSummary(path).getLength();
    + long paddedBytes = getTotalPaddingSize(reader);
    + // empty ORC file is ~45 bytes. Assumption here is file length always >0
    + double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
    + DecimalFormat format = new DecimalFormat("##.##");
    + System.out.println("\nFile length: " + fileLen + " bytes");
    + System.out.println("Padding length: " + paddedBytes + " bytes");
    + System.out.println("Padding ratio: " + format.format(percentPadding) + "%");
    + rows.close();
    + if (files.size() > 1) {
    + System.out.println(Strings.repeat("=", 80) + "\n");
    + }
    + } catch (Exception e) {
    + System.err.println("Unable to dump metadata for file: " + filename);
    + e.printStackTrace();
    + System.err.println(Strings.repeat("=", 80) + "\n");
    + continue;
            }
          }
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/d84e393e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
    index a438855..02e01b4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
    @@ -45,130 +45,138 @@ public class JsonFileDump {
            writer.object();
          }
          for (String filename : files) {
    - if (multiFile) {
    - writer.object();
    - }
    - writer.key("fileName").value(filename);
    - Path path = new Path(filename);
    - Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    - writer.key("fileVersion").value(reader.getFileVersion().getName());
    - writer.key("writerVersion").value(reader.getWriterVersion());
    - RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
    - writer.key("numberOfRows").value(reader.getNumberOfRows());
    - writer.key("compression").value(reader.getCompression());
    - if (reader.getCompression() != CompressionKind.NONE) {
    - writer.key("compressionBufferSize").value(reader.getCompressionSize());
    - }
    - writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
    - writer.key("schema").array();
    - writeSchema(writer, reader.getTypes());
    - writer.endArray();
    -
    - writer.key("stripeStatistics").array();
    - List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
    - for (int n = 0; n < stripeStatistics.size(); n++) {
    - writer.object();
    - writer.key("stripeNumber").value(n + 1);
    - StripeStatistics ss = stripeStatistics.get(n);
    - writer.key("columnStatistics").array();
    - for (int i = 0; i < ss.getColumnStatistics().length; i++) {
    + try {
    + if (multiFile) {
                writer.object();
    - writer.key("columnId").value(i);
    - writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
    - writer.endObject();
              }
    - writer.endArray();
    - writer.endObject();
    - }
    - writer.endArray();
    -
    - ColumnStatistics[] stats = reader.getStatistics();
    - int colCount = stats.length;
    - writer.key("fileStatistics").array();
    - for (int i = 0; i < stats.length; ++i) {
    - writer.object();
    - writer.key("columnId").value(i);
    - writeColumnStatistics(writer, stats[i]);
    - writer.endObject();
    - }
    - writer.endArray();
    -
    - writer.key("stripes").array();
    - int stripeIx = -1;
    - for (StripeInformation stripe : reader.getStripes()) {
    - ++stripeIx;
    - long stripeStart = stripe.getOffset();
    - OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
    - writer.object(); // start of stripe information
    - writer.key("stripeNumber").value(stripeIx + 1);
    - writer.key("stripeInformation");
    - writeStripeInformation(writer, stripe);
    - if (printTimeZone) {
    - writer.key("writerTimezone").value(
    - footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
    + writer.key("fileName").value(filename);
    + Path path = new Path(filename);
    + Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    + writer.key("fileVersion").value(reader.getFileVersion().getName());
    + writer.key("writerVersion").value(reader.getWriterVersion());
    + RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
    + writer.key("numberOfRows").value(reader.getNumberOfRows());
    + writer.key("compression").value(reader.getCompression());
    + if (reader.getCompression() != CompressionKind.NONE) {
    + writer.key("compressionBufferSize").value(reader.getCompressionSize());
              }
    - long sectionStart = stripeStart;
    + writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
    + writer.key("schema").array();
    + writeSchema(writer, reader.getTypes());
    + writer.endArray();

    - writer.key("streams").array();
    - for (OrcProto.Stream section : footer.getStreamsList()) {
    + writer.key("stripeStatistics").array();
    + List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
    + for (int n = 0; n < stripeStatistics.size(); n++) {
                writer.object();
    - String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
    - writer.key("columnId").value(section.getColumn());
    - writer.key("section").value(kind);
    - writer.key("startOffset").value(sectionStart);
    - writer.key("length").value(section.getLength());
    - sectionStart += section.getLength();
    + writer.key("stripeNumber").value(n + 1);
    + StripeStatistics ss = stripeStatistics.get(n);
    + writer.key("columnStatistics").array();
    + for (int i = 0; i < ss.getColumnStatistics().length; i++) {
    + writer.object();
    + writer.key("columnId").value(i);
    + writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
    + writer.endObject();
    + }
    + writer.endArray();
                writer.endObject();
              }
              writer.endArray();

    - writer.key("encodings").array();
    - for (int i = 0; i < footer.getColumnsCount(); ++i) {
    + ColumnStatistics[] stats = reader.getStatistics();
    + int colCount = stats.length;
    + writer.key("fileStatistics").array();
    + for (int i = 0; i < stats.length; ++i) {
                writer.object();
    - OrcProto.ColumnEncoding encoding = footer.getColumns(i);
                writer.key("columnId").value(i);
    - writer.key("kind").value(encoding.getKind());
    - if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
    - encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
    - writer.key("dictionarySize").value(encoding.getDictionarySize());
    - }
    + writeColumnStatistics(writer, stats[i]);
                writer.endObject();
              }
              writer.endArray();

    - if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
    - // include the columns that are specified, only if the columns are included, bloom filter
    - // will be read
    - boolean[] sargColumns = new boolean[colCount];
    - for (int colIdx : rowIndexCols) {
    - sargColumns[colIdx] = true;
    + writer.key("stripes").array();
    + int stripeIx = -1;
    + for (StripeInformation stripe : reader.getStripes()) {
    + ++stripeIx;
    + long stripeStart = stripe.getOffset();
    + OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
    + writer.object(); // start of stripe information
    + writer.key("stripeNumber").value(stripeIx + 1);
    + writer.key("stripeInformation");
    + writeStripeInformation(writer, stripe);
    + if (printTimeZone) {
    + writer.key("writerTimezone").value(
    + footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
                }
    - RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
    - writer.key("indexes").array();
    - for (int col : rowIndexCols) {
    + long sectionStart = stripeStart;
    +
    + writer.key("streams").array();
    + for (OrcProto.Stream section : footer.getStreamsList()) {
                  writer.object();
    - writer.key("columnId").value(col);
    - writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
    - writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
    + String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
    + writer.key("columnId").value(section.getColumn());
    + writer.key("section").value(kind);
    + writer.key("startOffset").value(sectionStart);
    + writer.key("length").value(section.getLength());
    + sectionStart += section.getLength();
                  writer.endObject();
                }
                writer.endArray();
    +
    + writer.key("encodings").array();
    + for (int i = 0; i < footer.getColumnsCount(); ++i) {
    + writer.object();
    + OrcProto.ColumnEncoding encoding = footer.getColumns(i);
    + writer.key("columnId").value(i);
    + writer.key("kind").value(encoding.getKind());
    + if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
    + encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
    + writer.key("dictionarySize").value(encoding.getDictionarySize());
    + }
    + writer.endObject();
    + }
    + writer.endArray();
    +
    + if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
    + // include the columns that are specified, only if the columns are included, bloom filter
    + // will be read
    + boolean[] sargColumns = new boolean[colCount];
    + for (int colIdx : rowIndexCols) {
    + sargColumns[colIdx] = true;
    + }
    + RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
    + writer.key("indexes").array();
    + for (int col : rowIndexCols) {
    + writer.object();
    + writer.key("columnId").value(col);
    + writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
    + writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
    + writer.endObject();
    + }
    + writer.endArray();
    + }
    + writer.endObject(); // end of stripe information
              }
    - writer.endObject(); // end of stripe information
    - }
    - writer.endArray();
    + writer.endArray();

    - FileSystem fs = path.getFileSystem(conf);
    - long fileLen = fs.getContentSummary(path).getLength();
    - long paddedBytes = FileDump.getTotalPaddingSize(reader);
    - // empty ORC file is ~45 bytes. Assumption here is file length always >0
    - double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
    - writer.key("fileLength").value(fileLen);
    - writer.key("paddingLength").value(paddedBytes);
    - writer.key("paddingRatio").value(percentPadding);
    - rows.close();
    + FileSystem fs = path.getFileSystem(conf);
    + long fileLen = fs.getContentSummary(path).getLength();
    + long paddedBytes = FileDump.getTotalPaddingSize(reader);
    + // empty ORC file is ~45 bytes. Assumption here is file length always >0
    + double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
    + writer.key("fileLength").value(fileLen);
    + writer.key("paddingLength").value(paddedBytes);
    + writer.key("paddingRatio").value(percentPadding);
    + writer.key("status").value("OK");
    + rows.close();

    - writer.endObject();
    + writer.endObject();
    + } catch (Exception e) {
    + writer.key("status").value("FAILED");
    + System.err.println("Unable to dump data for file: " + filename);
    + e.printStackTrace();
    + throw e;
    + }
          }
          if (multiFile) {
            writer.endArray();

    http://git-wip-us.apache.org/repos/asf/hive/blob/d84e393e/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    index 68d503e..40674ea 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    @@ -26,8 +26,6 @@ import java.io.ByteArrayOutputStream;
      import java.io.File;
      import java.io.FileOutputStream;
      import java.io.FileReader;
    -import java.io.IOException;
    -import java.io.OutputStream;
      import java.io.PrintStream;
      import java.sql.Date;
      import java.sql.Timestamp;
    @@ -252,54 +250,6 @@ public class TestFileDump {
          assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world \",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
        }

    - @Test(expected = IOException.class)
    - public void testDataDumpThrowsIOException() throws Exception {
    - PrintStream origOut = System.out;
    - try {
    - ObjectInspector inspector;
    - synchronized (TestOrcFile.class) {
    - inspector = ObjectInspectorFactory.getReflectionObjectInspector
    - (AllTypesRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    - }
    - Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
    - 100000, CompressionKind.NONE, 10000, 1000);
    - Map<String, String> m = new HashMap<String, String>(2);
    - m.put("k1", "v1");
    - writer.addRow(new AllTypesRecord(
    - true,
    - (byte) 10,
    - (short) 100,
    - 1000,
    - 10000L,
    - 4.0f,
    - 20.0,
    - HiveDecimal.create("4.2222"),
    - new Timestamp(1416967764000L),
    - new Date(1416967764000L),
    - "string",
    - new HiveChar("hello", 5),
    - new HiveVarchar("hello", 10),
    - m,
    - Arrays.asList(100, 200),
    - new AllTypesRecord.Struct(10, "foo")));
    -
    - writer.close();
    -
    - OutputStream myOut = new OutputStream() {
    - @Override
    - public void write(int b) throws IOException {
    - throw new IOException();
    - }
    - };
    -
    - // replace stdout and run command
    - System.setOut(new PrintStream(myOut));
    - FileDump.main(new String[]{testFilePath.toString(), "-d"});
    - } finally {
    - System.setOut(origOut);
    - }
    - }
    -
        // Test that if the fraction of rows that have distinct strings is greater than the configured
        // threshold dictionary encoding is turned off. If dictionary encoding is turned off the length
        // of the dictionary stream for the column will be 0 in the ORC file dump.

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 26, '15 at 7:21p
activeOct 26, '15 at 7:25p
posts2
users1
websitehive.apache.org

1 user in discussion

Prasanthj: 2 posts

People

Translate

site design / logo © 2021 Grokbase