FAQ
Author: omalley
Date: Wed Apr 9 19:46:12 2014
New Revision: 1586104

URL: http://svn.apache.org/r1586104
Log:
HIVE-6818. Fix array out of bounds when ORC is used with ACID and predicate pushdown. (omalley)

Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1586104&r1=1586103&r2=1586104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Apr 9 19:46:12 2014
@@ -106,6 +106,7 @@ public class OrcInputFormat implements
        SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE");
    static final String MAX_SPLIT_SIZE =
        SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE");
+ static final String SARG_PUSHDOWN = "sarg.pushdown";

    private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
    private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
@@ -268,21 +269,28 @@ public class OrcInputFormat implements
                                  boolean isOriginal) {
      int rootColumn = getRootColumn(isOriginal);
      String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ String sargPushdown = conf.get(SARG_PUSHDOWN);
      String columnNamesString =
          conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
- if (serializedPushdown == null || columnNamesString == null) {
+ if ((sargPushdown == null && serializedPushdown == null)
+ || columnNamesString == null) {
        LOG.debug("No ORC pushdown predicate");
        options.searchArgument(null, null);
      } else {
- SearchArgument sarg = SearchArgument.FACTORY.create
- (Utilities.deserializeExpression(serializedPushdown));
+ SearchArgument sarg;
+ if (serializedPushdown != null) {
+ sarg = SearchArgument.FACTORY.create
+ (Utilities.deserializeExpression(serializedPushdown));
+ } else {
+ sarg = SearchArgument.FACTORY.create(sargPushdown);
+ }
        LOG.info("ORC pushdown predicate: " + sarg);
        String[] neededColumnNames = columnNamesString.split(",");
        String[] columnNames = new String[types.size() - rootColumn];
        boolean[] includedColumns = options.getInclude();
        int i = 0;
        for(int columnId: types.get(rootColumn).getSubtypesList()) {
- if (includedColumns == null || includedColumns[columnId]) {
+ if (includedColumns == null || includedColumns[columnId - rootColumn]) {
            // this is guaranteed to be positive because types only have children
            // ids greater than their own id.
            columnNames[columnId - rootColumn] = neededColumnNames[i++];

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1586104&r1=1586103&r2=1586104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Wed Apr 9 19:46:12 2014
@@ -166,6 +166,12 @@ public interface SearchArgument {
    public TruthValue evaluate(TruthValue[] leaves);

    /**
+ * Serialize the SARG as a kyro object and return the base64 strig.
+ * @return the serialized SARG
+ */
+ public String toKryo();
+
+ /**
     * A factory for creating SearchArguments. Java doesn't allow static methods
     * in interfaces. *DOH*
     */
@@ -177,6 +183,10 @@ public interface SearchArgument {
      public Builder newBuilder() {
        return SearchArgumentImpl.newBuilder();
      }
+
+ public SearchArgument create(String kryo) {
+ return SearchArgumentImpl.fromKryo(kryo);
+ }
    }

    /**

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1586104&r1=1586103&r2=1586104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Wed Apr 9 19:46:12 2014
@@ -26,6 +26,10 @@ import java.util.HashMap;
  import java.util.List;
  import java.util.Map;

+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+import org.apache.commons.codec.binary.Base64;
  import org.apache.commons.lang.StringUtils;
  import org.apache.hadoop.hive.common.type.HiveChar;
  import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -65,6 +69,14 @@ final class SearchArgumentImpl implement
      private final Object literal;
      private final List<Object> literalList;

+ PredicateLeafImpl() {
+ operator = null;
+ type = null;
+ columnName = null;
+ literal = null;
+ literalList = null;
+ }
+
      PredicateLeafImpl(Operator operator,
                        Type type,
                        String columnName,
@@ -166,6 +178,13 @@ final class SearchArgumentImpl implement
      private final int leaf;
      private final TruthValue constant;

+ ExpressionTree() {
+ operator = null;
+ children = null;
+ leaf = 0;
+ constant = null;
+ }
+
      ExpressionTree(Operator op, ExpressionTree... kids) {
        operator = op;
        children = new ArrayList<ExpressionTree>();
@@ -818,6 +837,11 @@ final class SearchArgumentImpl implement
      }
    }

+ SearchArgumentImpl() {
+ leaves = null;
+ expression = null;
+ }
+
    SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
      this.expression = expression;
      this.leaves = leaves;
@@ -852,6 +876,18 @@ final class SearchArgumentImpl implement
      return buffer.toString();
    }

+ public String toKryo() {
+ Output out = new Output(4 * 1024, 10 * 1024 * 1024);
+ new Kryo().writeObject(out, this);
+ out.close();
+ return Base64.encodeBase64String(out.toBytes());
+ }
+
+ static SearchArgument fromKryo(String value) {
+ Input input = new Input(Base64.decodeBase64(value));
+ return new Kryo().readObject(input, SearchArgumentImpl.class);
+ }
+
    private static class BuilderImpl implements Builder {
      private final Deque<ExpressionTree> currentTree =
          new ArrayDeque<ExpressionTree>();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java?rev=1586104&r1=1586103&r2=1586104&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java Wed Apr 9 19:46:12 2014
@@ -71,7 +71,7 @@ public class ExprNodeGenericFuncDesc ext
    //Is this an expression that should perform a comparison for sorted searches
    private boolean isSortedExpr;

- public ExprNodeGenericFuncDesc() {
+ public ExprNodeGenericFuncDesc() {;
    }

    /* If the function has an explicit name like func(args) then call a

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1586104&r1=1586103&r2=1586104&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Wed Apr 9 19:46:12 2014
@@ -59,6 +59,8 @@ import org.apache.hadoop.hive.ql.io.FSRe
  import org.apache.hadoop.hive.ql.io.HiveInputFormat;
  import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
  import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
  import org.apache.hadoop.hive.ql.plan.MapWork;
  import org.apache.hadoop.hive.ql.plan.PartitionDesc;
  import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -1287,4 +1289,49 @@ public class TestInputOutputFormat {
            ioe.getMessage());
      }
    }
+
+ @Test
+ public void testSetSearchArgument() throws Exception {
+ Reader.Options options = new Reader.Options();
+ List<OrcProto.Type> types = new ArrayList<OrcProto.Type>();
+ OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
+ builder.setKind(OrcProto.Type.Kind.STRUCT)
+ .addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid",
+ "row"))
+ .addAllSubtypes(Arrays.asList(1,2,3,4,5,6));
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.INT);
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.STRUCT)
+ .addAllFieldNames(Arrays.asList("url", "purchase", "cost", "store"))
+ .addAllSubtypes(Arrays.asList(7, 8, 9, 10));
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.STRING);
+ types.add(builder.build());
+ builder.clear().setKind(OrcProto.Type.Kind.INT);
+ types.add(builder.build());
+ types.add(builder.build());
+ types.add(builder.build());
+ SearchArgument isNull = SearchArgument.FACTORY.newBuilder()
+ .startAnd().isNull("cost").end().build();
+ conf.set(OrcInputFormat.SARG_PUSHDOWN, isNull.toKryo());
+ conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
+ "url,cost");
+ options.include(new boolean[]{true, true, false, true, false});
+ OrcInputFormat.setSearchArgument(options, types, conf, false);
+ String[] colNames = options.getColumnNames();
+ assertEquals(null, colNames[0]);
+ assertEquals("url", colNames[1]);
+ assertEquals(null, colNames[2]);
+ assertEquals("cost", colNames[3]);
+ assertEquals(null, colNames[4]);
+ SearchArgument arg = options.getSearchArgument();
+ List<PredicateLeaf> leaves = arg.getLeaves();
+ assertEquals("cost", leaves.get(0).getColumnName());
+ assertEquals(PredicateLeaf.Operator.IS_NULL, leaves.get(0).getOperator());
+ }
  }

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedApr 9, '14 at 7:46p
activeApr 9, '14 at 7:46p
posts1
users1
websitehive.apache.org

1 user in discussion

Omalley: 1 post

People

Translate

site design / logo © 2021 Grokbase