FAQ
Author: ehans
Date: Fri Jan 17 02:08:46 2014
New Revision: 1558987

URL: http://svn.apache.org/r1558987
Log:
HIVE-5595: Implement vectorized SMB JOIN (Remus Rusanu via Eric Hanson)

Added:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
     hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
     hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Fri Jan 17 02:08:46 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.ve
  import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
  import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator;
  import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
  import org.apache.hadoop.hive.ql.metadata.HiveException;
  import org.apache.hadoop.hive.ql.plan.CollectDesc;
@@ -121,6 +122,7 @@ public final class OperatorFactory {
      vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
      vectorOpvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, VectorGroupByOperator.class));
      vectorOpvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class, VectorMapJoinOperator.class));
+ vectorOpvec.add(new OpTuple<SMBJoinDesc>(SMBJoinDesc.class, VectorSMBMapJoinOperator.class));
      vectorOpvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class,
          VectorReduceSinkOperator.class));
      vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Fri Jan 17 02:08:46 2014
@@ -226,6 +226,11 @@ public class SMBMapJoinOperator extends
    public void cleanUpInputFileChangedOp() throws HiveException {
      inputFileChanged = true;
    }
+
+ protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
+ return JoinUtil.computeKeys(row, joinKeys[alias],
+ joinKeysObjectInspectors[alias]);
+ }

    @Override
    public void processOp(Object row, int tag) throws HiveException {
@@ -260,8 +265,8 @@ public class SMBMapJoinOperator extends
      byte alias = (byte) tag;

      // compute keys and values as StandardObjects
- ArrayList<Object> key = JoinUtil.computeKeys(row, joinKeys[alias],
- joinKeysObjectInspectors[alias]);
+ List<Object> key = smbJoinComputeKeys(row, alias);
+
      List<Object> value = getFilteredValue(alias, row);


@@ -495,7 +500,7 @@ public class SMBMapJoinOperator extends
      return smallestOne == null ? null : result;
    }

- private boolean processKey(byte alias, ArrayList<Object> key)
+ private boolean processKey(byte alias, List<Object> key)
        throws HiveException {
      List<Object> keyWritable = keyWritables[alias];
      if (keyWritable == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Jan 17 02:08:46 2014
@@ -3192,10 +3192,24 @@ public final class Utilities {
      }
    }

- public static void clearWorkMap() {
+ /**
+ * Returns true if a plan is both configured for vectorized execution
+ * and vectorization is allowed. The plan may be configured for vectorization
+ * but vectorization dissalowed eg. for FetchOperator execution.
+ */
+ public static boolean isVectorMode(Configuration conf) {
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) &&
+ Utilities.getPlanPath(conf) != null && Utilities
+ .getMapRedWork(conf).getMapWork().getVectorMode()) {
+ return true;
+ }
+ return false;
+ }
+
+ public static void clearWorkMap() {
      gWorkMap.clear();
    }
-
+
    /**
     * Create a temp dir in specified baseDir
     * This can go away once hive moves to support only JDK 7

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java Fri Jan 17 02:08:46 2014
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * VectorSMBJoinOperator.
+ * Implements the vectorized SMB join operator. The implementation relies on the row-mode SMB join operator.
+ * It accepts a vectorized batch input from the big table and iterates over the batch, calling the parent row-mode
+ * implementation for each row in the batch.
+ */
+public class VectorSMBMapJoinOperator extends SMBMapJoinOperator implements VectorizationContextRegion {
+
+ private static final Log LOG = LogFactory.getLog(
+ VectorSMBMapJoinOperator.class.getName());
+
+ private static final long serialVersionUID = 1L;
+
+ private int tagLen;
+
+ private transient VectorizedRowBatch outputBatch;
+ private transient VectorizationContext vOutContext = null;
+ private transient VectorizedRowBatchCtx vrbCtx = null;
+
+ private String fileKey;
+
+ private VectorExpression[] bigTableValueExpressions;
+
+ private VectorExpression[] bigTableFilterExpressions;
+
+ private VectorExpression[] keyExpressions;
+
+ private VectorExpressionWriter[] keyOutputWriters;
+
+ private transient VectorHashKeyWrapperBatch keyWrapperBatch;
+
+ private transient Map<ObjectInspector, VectorColumnAssign[]> outputVectorAssigners;
+
+ private transient int batchIndex = -1;
+
+ private transient VectorHashKeyWrapper[] keyValues;
+
+ private transient SMBJoinKeyEvaluator keyEvaluator;
+
+ private transient VectorExpressionWriter[] valueWriters;
+
+ private interface SMBJoinKeyEvaluator {
+ List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException;
+}
+
+ public VectorSMBMapJoinOperator() {
+ super();
+ }
+
+ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf)
+ throws HiveException {
+ this();
+ SMBJoinDesc desc = (SMBJoinDesc) conf;
+ this.conf = desc;
+
+ order = desc.getTagOrder();
+ numAliases = desc.getExprs().size();
+ posBigTable = (byte) desc.getPosBigTable();
+ filterMaps = desc.getFilterMap();
+ tagLen = desc.getTagLength();
+ noOuterJoin = desc.isNoOuterJoin();
+
+ // Must obtain vectorized equivalents for filter and value expressions
+
+ Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
+ bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable),
+ VectorExpressionDescriptor.Mode.FILTER);
+
+ List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
+ keyExpressions = vContext.getVectorExpressions(keyDesc);
+ keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
+
+ Map<Byte, List<ExprNodeDesc>> exprs = desc.getExprs();
+ bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
+
+ // Vectorized join operators need to create a new vectorization region for child operators.
+
+ List<String> outColNames = desc.getOutputColumnNames();
+
+ Map<String, Integer> mapOutCols = new HashMap<String, Integer>(outColNames.size());
+
+ int outColIndex = 0;
+ for(String outCol: outColNames) {
+ mapOutCols.put(outCol, outColIndex++);
+ }
+
+ vOutContext = new VectorizationContext(mapOutCols, outColIndex);
+ vOutContext.setFileKey(vContext.getFileKey() + "/SMB_JOIN_" + desc.getBigTableAlias());
+ this.fileKey = vOutContext.getFileKey();
+ }
+
+ @Override
+ protected List<Object> smbJoinComputeKeys(Object row, byte alias) throws HiveException {
+ if (alias == this.posBigTable) {
+ VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+ return keyEvaluator.evaluate(keyValues[batchIndex]);
+ } else {
+ return super.smbJoinComputeKeys(row, alias);
+ }
+ }
+
+ @Override
+ protected void initializeOp(Configuration hconf) throws HiveException {
+ super.initializeOp(hconf);
+
+ vrbCtx = new VectorizedRowBatchCtx();
+ vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector);
+
+ outputBatch = vrbCtx.createVectorizedRowBatch();
+
+ keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
+
+ outputVectorAssigners = new HashMap<ObjectInspector, VectorColumnAssign[]>();
+
+ // This key evaluator translates from the vectorized VectorHashKeyWrapper format
+ // into the row-mode MapJoinKey
+ keyEvaluator = new SMBJoinKeyEvaluator() {
+ private List<Object> key;
+
+ public SMBJoinKeyEvaluator init() {
+ key = new ArrayList<Object>();
+ for(int i = 0; i < keyExpressions.length; ++i) {
+ key.add(null);
+ }
+ return this;
+ }
+
+ @Override
+ public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
+ for(int i = 0; i < keyExpressions.length; ++i) {
+ key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
+ }
+ return key;
+ };
+ }.init();
+
+ Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
+ List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
+
+ // We're hijacking the big table evaluators and replacing them with our own custom ones
+ // which are going to return values from the input batch vector expressions
+ List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
+
+ VectorExpressionWriterFactory.processVectorExpressions(
+ bigTableExpressions,
+ new VectorExpressionWriterFactory.ListOIDClosure() {
+
+ @Override
+ public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
+ valueWriters = writers;
+ joinValuesObjectInspectors[posBigTable] = oids;
+ }
+ });
+
+ for(int i=0; i<bigTableExpressions.size(); ++i) {
+ ExprNodeDesc desc = bigTableExpressions.get(i);
+ VectorExpression vectorExpr = bigTableValueExpressions[i];
+
+ // This is a vectorized aware evaluator
+ ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc) {
+ int columnIndex;;
+ int writerIndex;
+
+ public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
+ this.columnIndex = columnIndex;
+ this.writerIndex = writerIndex;
+ return this;
+ }
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
+ throw new HiveException("should never reach here");
+ }
+
+ @Override
+ protected Object _evaluate(Object row, int version) throws HiveException {
+ VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+ int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
+ return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
+ }
+ }.initVectorExpr(vectorExpr.getOutputColumn(), i);
+ vectorNodeEvaluators.add(eval);
+ }
+ // Now replace the old evaluators with our own
+ joinValues[posBigTable] = vectorNodeEvaluators;
+
+ }
+
+ @Override
+ public void processOp(Object row, int tag) throws HiveException {
+ byte alias = (byte) tag;
+
+ if (alias != this.posBigTable) {
+ super.processOp(row, tag);
+ } else {
+
+ VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
+
+ if (null != bigTableFilterExpressions) {
+ for(VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(inBatch);
+ }
+ }
+
+ if (null != bigTableValueExpressions) {
+ for(VectorExpression ve : bigTableValueExpressions) {
+ ve.evaluate(inBatch);
+ }
+ }
+
+ keyWrapperBatch.evaluateBatch(inBatch);
+ keyValues = keyWrapperBatch.getVectorHashKeyWrappers();
+
+ // This implementation of vectorized JOIN is delegating all the work
+ // to the row-mode implementation by hijacking the big table node evaluators
+ // and calling the row-mode join processOp for each row in the input batch.
+ // Since the JOIN operator is not fully vectorized anyway at the moment
+ // (due to the use of row-mode small-tables) this is a reasonable trade-off.
+ //
+ for(batchIndex=0; batchIndex < inBatch.size; ++batchIndex ) {
+ super.processOp(row, tag);
+ }
+
+ // Set these two to invalid values so any attempt to use them
+ // outside the inner loop results in NPE/OutOfBounds errors
+ batchIndex = -1;
+ keyValues = null;
+ }
+ }
+
+ @Override
+ public void closeOp(boolean aborted) throws HiveException {
+ super.closeOp(aborted);
+ if (!aborted && 0 < outputBatch.size) {
+ flushOutput();
+ }
+ }
+
+ @Override
+ protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException {
+ Object[] values = (Object[]) row;
+ VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI);
+ if (null == vcas) {
+ Map<String, Map<String, Integer>> allColumnMaps = Utilities.
+ getMapRedWork(hconf).getMapWork().getScratchColumnMap();
+ Map<String, Integer> columnMap = allColumnMaps.get(fileKey);
+ vcas = VectorColumnAssignFactory.buildAssigners(
+ outputBatch, outputOI, columnMap, conf.getOutputColumnNames());
+ outputVectorAssigners.put(outputOI, vcas);
+ }
+ for (int i = 0; i < values.length; ++i) {
+ vcas[i].assignObjectValue(values[i], outputBatch.size);
+ }
+ ++outputBatch.size;
+ if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
+ flushOutput();
+ }
+ }
+
+ private void flushOutput() throws HiveException {
+ forward(outputBatch, null);
+ outputBatch.reset();
+ }
+
+ @Override
+ public VectorizationContext getOuputVectorizationContext() {
+ return vOutContext;
+ }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Fri Jan 17 02:08:46 2014
@@ -299,11 +299,7 @@ public class OrcInputFormat implements
    }

    private boolean isVectorMode(Configuration conf) {
- if (Utilities.getPlanPath(conf) != null && Utilities
- .getMapRedWork(conf).getMapWork().getVectorMode()) {
- return true;
- }
- return false;
+ return Utilities.isVectorMode(conf);
    }

    /**

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 17 02:08:46 2014
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.Ma
  import org.apache.hadoop.hive.ql.exec.Operator;
  import org.apache.hadoop.hive.ql.exec.OperatorFactory;
  import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
  import org.apache.hadoop.hive.ql.exec.SelectOperator;
  import org.apache.hadoop.hive.ql.exec.TableScanOperator;
  import org.apache.hadoop.hive.ql.exec.Task;
@@ -78,6 +79,7 @@ import org.apache.hadoop.hive.ql.plan.Ma
  import org.apache.hadoop.hive.ql.plan.MapWork;
  import org.apache.hadoop.hive.ql.plan.OperatorDesc;
  import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
  import org.apache.hadoop.hive.ql.plan.TableScanDesc;
  import org.apache.hadoop.hive.ql.plan.TezWork;
  import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -555,6 +557,8 @@ public class Vectorizer implements Physi
        case MAPJOIN:
          if (op instanceof MapJoinOperator) {
            ret = validateMapJoinOperator((MapJoinOperator) op);
+ } else if (op instanceof SMBMapJoinOperator) {
+ ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op);
          }
          break;
        case GROUPBY:
@@ -583,6 +587,12 @@ public class Vectorizer implements Physi
      return ret;
    }

+ private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
+ SMBJoinDesc desc = op.getConf();
+ // Validation is the same as for map join, since the 'small' tables are not vectorized
+ return validateMapJoinDesc(desc);
+ }
+
    private boolean validateTableScanOperator(TableScanOperator op) {
      TableScanDesc desc = op.getConf();
      return !desc.isGatherStats();
@@ -590,6 +600,10 @@ public class Vectorizer implements Physi

    private boolean validateMapJoinOperator(MapJoinOperator op) {
      MapJoinDesc desc = op.getConf();
+ return validateMapJoinDesc(desc);
+ }
+
+ private boolean validateMapJoinDesc(MapJoinDesc desc) {
      byte posBigTable = (byte) desc.getPosBigTable();
      List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
      List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1558987&r1=1558986&r2=1558987&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Fri Jan 17 02:08:46 2014
@@ -146,39 +146,63 @@ public class TestVectorizer {
      Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER));
      Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION));
    }
+
+ /**
+ * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests.
+ */
+ private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> mop, MapJoinDesc mjdesc) {
+ mjdesc.setPosBigTable(0);
+ List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>();
+ expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
+ Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
+ keyMap.put((byte)0, expr);
+ mjdesc.setKeys(keyMap);
+ mjdesc.setExprs(keyMap);

+ //Set filter expression
+ GenericUDFOPEqual udf = new GenericUDFOPEqual();
+ ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc();
+ equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ equalExprDesc.setGenericUDF(udf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
+ children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false));
+ equalExprDesc.setChildren(children1);
+ List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>();
+ filterExpr.add(equalExprDesc);
+ Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
+ filterMap.put((byte) 0, expr);
+ mjdesc.setFilters(filterMap);
+ }
+
+ /**
+ * testValidateMapJoinOperator validates that the Map join operator can be vectorized.
+ */
    @Test
    public void testValidateMapJoinOperator() {
      MapJoinOperator mop = new MapJoinOperator();
      MapJoinDesc mjdesc = new MapJoinDesc();
- mjdesc.setPosBigTable(0);
- List<ExprNodeDesc> expr = new ArrayList<ExprNodeDesc>();
- expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
- Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
- keyMap.put((byte)0, expr);
- mjdesc.setKeys(keyMap);
- mjdesc.setExprs(keyMap);
-
- //Set filter expression
- GenericUDFOPEqual udf = new GenericUDFOPEqual();
- ExprNodeGenericFuncDesc equalExprDesc = new ExprNodeGenericFuncDesc();
- equalExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
- equalExprDesc.setGenericUDF(udf);
- List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
- children1.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
- children1.add(new ExprNodeColumnDesc(Integer.class, "col3", "T2", false));
- equalExprDesc.setChildren(children1);
- List<ExprNodeDesc> filterExpr = new ArrayList<ExprNodeDesc>();
- filterExpr.add(equalExprDesc);
- Map<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
- filterMap.put((byte) 0, expr);
- mjdesc.setFilters(filterMap);
+
+ prepareAbstractMapJoin(mop, mjdesc);
      mop.setConf(mjdesc);
-
+
      Vectorizer vectorizer = new Vectorizer();
-
      Assert.assertTrue(vectorizer.validateOperator(mop));
- SMBMapJoinOperator smbmop = new SMBMapJoinOperator(mop);
- Assert.assertFalse(vectorizer.validateOperator(smbmop));
+ }
+
+
+ /**
+ * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized.
+ */
+ @Test
+ public void testValidateSMBJoinOperator() {
+ SMBMapJoinOperator mop = new SMBMapJoinOperator();
+ SMBJoinDesc mjdesc = new SMBJoinDesc();
+
+ prepareAbstractMapJoin(mop, mjdesc);
+ mop.setConf(mjdesc);
+
+ Vectorizer vectorizer = new Vectorizer();
+ Assert.assertTrue(vectorizer.validateOperator(mop));
    }
  }

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q Fri Jan 17 02:08:46 2014
@@ -0,0 +1,46 @@
+create table vsmb_bucket_1(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC;
+create table vsmb_bucket_2(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC;
+
+create table vsmb_bucket_RC(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS RCFILE;
+
+create table vsmb_bucket_TXT(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS TEXTFILE;
+
+insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2;
+insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2;
+
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join.noconditionaltask = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key;
+
+-- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out?rev=1558987&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_bucketmapjoin1.q.out Fri Jan 17 02:08:46 2014
@@ -0,0 +1,370 @@
+PREHOOK: query: create table vsmb_bucket_1(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_1(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_1
+PREHOOK: query: create table vsmb_bucket_2(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_2(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_2
+PREHOOK: query: create table vsmb_bucket_RC(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_RC(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_RC
+PREHOOK: query: create table vsmb_bucket_TXT(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table vsmb_bucket_TXT(key int, value string)
+ CLUSTERED BY (key)
+ SORTED BY (key) INTO 1 BUCKETS
+ STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@vsmb_bucket_TXT
+PREHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: insert into table vsmb_bucket_1 select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: insert into table vsmb_bucket_2 select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_rc
+POSTHOOK: query: insert into table vsmb_bucket_RC select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_rc
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@vsmb_bucket_txt
+POSTHOOK: query: insert into table vsmb_bucket_TXT select cint, cstring1 from alltypesorc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@vsmb_bucket_txt
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+PREHOOK: query: explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Position of Big Table: 1
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+PREHOOK: query: explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_RC) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Position of Big Table: 0
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_rc
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_rc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
+-- explain
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
+
+explain
+select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME vsmb_bucket_1) a) (TOK_TABREF (TOK_TABNAME vsmb_bucket_TXT) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Position of Big Table: 0
+ Vectorized execution: true
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Vectorized execution: true
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Vectorized execution: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_txt
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_txt
+#### A masked pattern was here ####
+POSTHOOK: Lineage: vsmb_bucket_1.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_1.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_2.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_rc.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
+528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 1 of 1 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 17, '14 at 2:09a
activeJan 17, '14 at 2:09a
posts1
users1
websitehive.apache.org

1 user in discussion

Ehans: 1 post

People

Translate

site design / logo © 2021 Grokbase