FAQ
Repository: hive
Updated Branches:
   refs/heads/master 44ab45534 -> ff10f0337


HIVE-10729: Query failed when select complex columns from joinned table (tez map join only) (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff10f033
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff10f033
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff10f033

Branch: refs/heads/master
Commit: ff10f03371f5ff54d34a28938c5d6e69940113ea
Parents: 44ab455
Author: Matt McCline <mmccline@hortonworks.com>
Authored: Tue Mar 29 01:52:48 2016 -0700
Committer: Matt McCline <mmccline@hortonworks.com>
Committed: Tue Mar 29 01:54:51 2016 -0700

----------------------------------------------------------------------
  .../test/resources/testconfiguration.properties | 1 +
  .../ql/exec/vector/VectorizationContext.java | 14 +-
  .../mapjoin/VectorMapJoinCommonOperator.java | 2 +-
  .../hive/ql/optimizer/physical/Vectorizer.java | 7 +
  .../ql/optimizer/physical/TestVectorizer.java | 5 +
  .../clientpositive/vector_complex_join.q | 29 +++
  .../tez/vector_complex_join.q.out | 227 +++++++++++++++++++
  .../clientpositive/vector_complex_join.q.out | 225 ++++++++++++++++++
  8 files changed, 502 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0672e0e..ed26dea 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -258,6 +258,7 @@ minitez.query.files.shared=acid_globallimit.q,\
    vector_coalesce.q,\
    vector_coalesce_2.q,\
    vector_complex_all.q,\
+ vector_complex_join.q,\
    vector_count_distinct.q,\
    vector_data_types.q,\
    vector_date_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 0552f9d..1eb960d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -342,7 +342,7 @@ public class VectorizationContext {

      private final Set<Integer> usedOutputColumns = new HashSet<Integer>();

- int allocateOutputColumn(String hiveTypeName) {
+ int allocateOutputColumn(String hiveTypeName) throws HiveException {
          if (initialOutputCol < 0) {
            // This is a test
            return 0;
@@ -403,7 +403,7 @@ public class VectorizationContext {
      }
    }

- public int allocateScratchColumn(String hiveTypeName) {
+ public int allocateScratchColumn(String hiveTypeName) throws HiveException {
      return ocm.allocateOutputColumn(hiveTypeName);
    }

@@ -2243,7 +2243,7 @@ public class VectorizationContext {
      }
    }

- static String getNormalizedName(String hiveTypeName) {
+ static String getNormalizedName(String hiveTypeName) throws HiveException {
      VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
      switch (argType) {
      case INT_FAMILY:
@@ -2269,11 +2269,11 @@ public class VectorizationContext {
      case INTERVAL_DAY_TIME:
        return hiveTypeName;
      default:
- return "None";
+ throw new HiveException("Unexpected hive type name " + hiveTypeName);
      }
    }

- static String getUndecoratedName(String hiveTypeName) {
+ static String getUndecoratedName(String hiveTypeName) throws HiveException {
      VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
      switch (argType) {
      case INT_FAMILY:
@@ -2296,7 +2296,7 @@ public class VectorizationContext {
      case INTERVAL_DAY_TIME:
        return hiveTypeName;
      default:
- return "None";
+ throw new HiveException("Unexpected hive type name " + hiveTypeName);
      }
    }

@@ -2511,7 +2511,7 @@ public class VectorizationContext {
      }
      sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", ");

- sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString());
+ sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames()));

      return sb.toString();
    }

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index e26e31b..8ad7ca4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -255,7 +255,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
      determineCommonInfo(isOuterJoin);
    }

- protected void determineCommonInfo(boolean isOuter) {
+ protected void determineCommonInfo(boolean isOuter) throws HiveException {

      bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");


http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index f674ece..d806b97 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1362,6 +1362,13 @@ public class Vectorizer implements PhysicalPlanResolver {
        LOG.info("Cannot vectorize map work value expression");
        return false;
      }
+ Byte[] order = desc.getTagOrder();
+ Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
+ List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
+ if (!validateExprNodeDesc(smallTableExprs)) {
+ LOG.info("Cannot vectorize map work small table expression");
+ return false;
+ }
      return true;
    }


http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
index 5628959..9d4ca76 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
@@ -158,8 +158,13 @@ public class TestVectorizer {
        expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
        Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
        keyMap.put((byte)0, expr);
+ List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
+ smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
+ keyMap.put((byte)1, smallTableExpr);
        mjdesc.setKeys(keyMap);
        mjdesc.setExprs(keyMap);
+ Byte[] order = new Byte[] {(byte) 0, (byte) 1};
+ mjdesc.setTagOrder(order);

        //Set filter expression
        GenericUDFOPEqual udf = new GenericUDFOPEqual();

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/queries/clientpositive/vector_complex_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q b/ql/src/test/queries/clientpositive/vector_complex_join.q
new file mode 100644
index 0000000..30f38b1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_complex_join.q
@@ -0,0 +1,29 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
+
+-- From HIVE-10729. Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC;
+INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1;
+
+explain
+select * from alltypesorc join test where alltypesorc.cint=test.a;
+
+select * from alltypesorc join test where alltypesorc.cint=test.a;
+
+
+
+CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC;
+INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1;
+
+CREATE TABLE test2b (a INT) STORED AS ORC;
+INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4);
+
+explain
+select * from test2b join test2a on test2b.a = test2a.a[1];
+
+select * from test2b join test2a on test2b.a = test2a.a[1];
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
new file mode 100644
index 0000000..dc988ef
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
@@ -0,0 +1,227 @@
+PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test
+POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.a SIMPLE []
+POSTHOOK: Lineage: test.b EXPRESSION []
+c0 c1
+PREHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: test
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int), b (type: map<int,string>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: map<int,string>)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b
+-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"}
+PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2a
+POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2a
+PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test2a
+POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test2a
+POSTHOOK: Lineage: test2a.a EXPRESSION []
+c0
+PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2b
+POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2b
+PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test2b
+POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test2b
+POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+_col0
+PREHOOK: query: explain
+select * from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test2b
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a[1] (type: int)
+ outputColumnNames: _col0, _col4
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col4 (type: array<int>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: test2a
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a[1] is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: a[1] (type: int)
+ sort order: +
+ Map-reduce partition columns: a[1] (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: a (type: array<int>)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2a
+PREHOOK: Input: default@test2b
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2a
+POSTHOOK: Input: default@test2b
+#### A masked pattern was here ####
+test2b.a test2a.a
+2 [1,2]

http://git-wip-us.apache.org/repos/asf/hive/blob/ff10f033/ql/src/test/results/clientpositive/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out
new file mode 100644
index 0000000..002cdeb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out
@@ -0,0 +1,225 @@
+PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test
+POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.a SIMPLE []
+POSTHOOK: Lineage: test.b EXPRESSION []
+c0 c1
+PREHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:test
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:test
+ TableScan
+ alias: test
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int), b (type: map<int,string>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col2 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b
+-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"}
+PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2a
+POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2a
+PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test2a
+POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test2a
+POSTHOOK: Lineage: test2a.a EXPRESSION []
+c0
+PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2b
+POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2b
+PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test2b
+POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test2b
+POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+_col0
+PREHOOK: query: explain
+select * from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ test2b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ test2b
+ TableScan
+ alias: test2b
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 a (type: int)
+ 1 a[1] (type: int)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test2a
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a[1] is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a[1] (type: int)
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col4 (type: array<int>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2a
+PREHOOK: Input: default@test2b
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2a
+POSTHOOK: Input: default@test2b
+#### A masked pattern was here ####
+test2b.a test2a.a
+2 [1,2]

Search Discussions

  • Mmccline at Mar 29, 2016 at 9:17 am
    Repository: hive
    Updated Branches:
       refs/heads/branch-1 3a39aba34 -> 1934214ae


    HIVE-10729: Query failed when select complex columns from joinned table (tez map join only) (Matt McCline, reviewed by Sergey Shelukhin)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1934214a
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1934214a
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1934214a

    Branch: refs/heads/branch-1
    Commit: 1934214ae97f3013ae0a0a5ed42fb32e82156634
    Parents: 3a39aba
    Author: Matt McCline <mmccline@hortonworks.com>
    Authored: Tue Mar 29 02:17:36 2016 -0700
    Committer: Matt McCline <mmccline@hortonworks.com>
    Committed: Tue Mar 29 02:17:36 2016 -0700

    ----------------------------------------------------------------------
      .../test/resources/testconfiguration.properties | 1 +
      .../ql/exec/vector/VectorizationContext.java | 14 +-
      .../mapjoin/VectorMapJoinCommonOperator.java | 2 +-
      .../hive/ql/optimizer/physical/Vectorizer.java | 7 +
      .../ql/optimizer/physical/TestVectorizer.java | 5 +
      .../clientpositive/vector_complex_join.q | 30 +++
      .../tez/vector_complex_join.q.out | 224 +++++++++++++++++++
      .../clientpositive/vector_complex_join.q.out | 224 +++++++++++++++++++
      8 files changed, 499 insertions(+), 8 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/itests/src/test/resources/testconfiguration.properties
    ----------------------------------------------------------------------
    diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
    index e34560f..046d556 100644
    --- a/itests/src/test/resources/testconfiguration.properties
    +++ b/itests/src/test/resources/testconfiguration.properties
    @@ -231,6 +231,7 @@ minitez.query.files.shared=acid_globallimit.q,\
        vector_coalesce_2.q,\
        vector_complex.q,\
        vector_complex_all.q,\
    + vector_complex_join.q,\
        vector_count_distinct.q,\
        vector_data_types.q,\
        vector_date_1.q,\

    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    index 48a6212..43c7f3d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    @@ -334,7 +334,7 @@ public class VectorizationContext {

          private final Set<Integer> usedOutputColumns = new HashSet<Integer>();

    - int allocateOutputColumn(String hiveTypeName) {
    + int allocateOutputColumn(String hiveTypeName) throws HiveException {
              if (initialOutputCol < 0) {
                // This is a test
                return 0;
    @@ -395,7 +395,7 @@ public class VectorizationContext {
          }
        }

    - public int allocateScratchColumn(String hiveTypeName) {
    + public int allocateScratchColumn(String hiveTypeName) throws HiveException {
          return ocm.allocateOutputColumn(hiveTypeName);
        }

    @@ -2253,7 +2253,7 @@ public class VectorizationContext {
          }
        }

    - static String getNormalizedName(String hiveTypeName) {
    + static String getNormalizedName(String hiveTypeName) throws HiveException {
          VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
          switch (argType) {
          case INT_FAMILY:
    @@ -2279,11 +2279,11 @@ public class VectorizationContext {
          case INTERVAL_DAY_TIME:
            return hiveTypeName;
          default:
    - return "None";
    + throw new HiveException("Unexpected hive type name " + hiveTypeName);
          }
        }

    - static String getUndecoratedName(String hiveTypeName) {
    + static String getUndecoratedName(String hiveTypeName) throws HiveException {
          VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName);
          switch (argType) {
          case INT_FAMILY:
    @@ -2306,7 +2306,7 @@ public class VectorizationContext {
          case INTERVAL_DAY_TIME:
            return hiveTypeName;
          default:
    - return "None";
    + throw new HiveException("Unexpected hive type name " + hiveTypeName);
          }
        }

    @@ -2505,7 +2505,7 @@ public class VectorizationContext {
          }
          sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", ");

    - sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString());
    + sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames()));

          return sb.toString();
        }

    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    index f28d3ab..45b52c4 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
    @@ -253,7 +253,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
          determineCommonInfo(isOuterJoin);
        }

    - protected void determineCommonInfo(boolean isOuter) {
    + protected void determineCommonInfo(boolean isOuter) throws HiveException {

          bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");


    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    index 758256b..2b7335a 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    @@ -1318,6 +1318,13 @@ public class Vectorizer implements PhysicalPlanResolver {
            LOG.info("Cannot vectorize map work value expression");
            return false;
          }
    + Byte[] order = desc.getTagOrder();
    + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    + List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    + if (!validateExprNodeDesc(smallTableExprs)) {
    + LOG.info("Cannot vectorize map work small table expression");
    + return false;
    + }
          return true;
        }


    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    index f9a0e79..fa802d6 100644
    --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    @@ -157,8 +157,13 @@ public class TestVectorizer {
            expr.add(new ExprNodeColumnDesc(Integer.class, "col1", "T", false));
            Map<Byte, List<ExprNodeDesc>> keyMap = new HashMap<Byte, List<ExprNodeDesc>>();
            keyMap.put((byte)0, expr);
    + List<ExprNodeDesc> smallTableExpr = new ArrayList<ExprNodeDesc>();
    + smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false));
    + keyMap.put((byte)1, smallTableExpr);
            mjdesc.setKeys(keyMap);
            mjdesc.setExprs(keyMap);
    + Byte[] order = new Byte[] {(byte) 0, (byte) 1};
    + mjdesc.setTagOrder(order);

            //Set filter expression
            GenericUDFOPEqual udf = new GenericUDFOPEqual();

    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/test/queries/clientpositive/vector_complex_join.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q b/ql/src/test/queries/clientpositive/vector_complex_join.q
    new file mode 100644
    index 0000000..6063ca7
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/vector_complex_join.q
    @@ -0,0 +1,30 @@
    +set hive.cli.print.header=true;
    +set hive.explain.user=false;
    +set hive.fetch.task.conversion=none;
    +SET hive.vectorized.execution.enabled=true;
    +SET hive.auto.convert.join=true;
    +set hive.fetch.task.conversion=none;
    +set hive.cbo.enable=false;
    +
    +-- From HIVE-10729. Not expected to vectorize this query.
    +--
    +CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC;
    +INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1;
    +
    +explain
    +select * from alltypesorc join test where alltypesorc.cint=test.a;
    +
    +select * from alltypesorc join test where alltypesorc.cint=test.a;
    +
    +
    +
    +CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC;
    +INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1;
    +
    +CREATE TABLE test2b (a INT) STORED AS ORC;
    +INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4);
    +
    +explain
    +select * from test2b join test2a on test2b.a = test2a.a[1];
    +
    +select * from test2b join test2a on test2b.a = test2a.a[1];
    \ No newline at end of file

    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
    new file mode 100644
    index 0000000..f84fbbc
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/tez/vector_complex_join.q.out
    @@ -0,0 +1,224 @@
    +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
    +--
    +CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test
    +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
    +--
    +CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test
    +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@test
    +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@test
    +POSTHOOK: Lineage: test.a SIMPLE []
    +POSTHOOK: Lineage: test.b EXPRESSION []
    +_c0 _c1
    +PREHOOK: query: explain
    +select * from alltypesorc join test where alltypesorc.cint=test.a
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select * from alltypesorc join test where alltypesorc.cint=test.a
    +POSTHOOK: type: QUERY
    +Explain
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 2 (BROADCAST_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: alltypesorc
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 cint (type: int)
    + 1 a (type: int)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col15, _col16
    + input vertices:
    + 1 Map 2
    + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Filter Operator
    + predicate: (_col2 = _col15) (type: boolean)
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col15 (type: int), _col16 (type: map<int,string>)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: test
    + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: a (type: int)
    + sort order: +
    + Map-reduce partition columns: a (type: int)
    + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
    + value expressions: b (type: map<int,string>)
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +PREHOOK: Input: default@test
    +#### A masked pattern was here ####
    +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +POSTHOOK: Input: default@test
    +#### A masked pattern was here ####
    +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b
    +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"}
    +PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test2a
    +POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test2a
    +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@test2a
    +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@test2a
    +POSTHOOK: Lineage: test2a.a EXPRESSION []
    +_c0
    +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test2b
    +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test2b
    +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__1
    +PREHOOK: Output: default@test2b
    +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__1
    +POSTHOOK: Output: default@test2b
    +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +_col0
    +PREHOOK: query: explain
    +select * from test2b join test2a on test2b.a = test2a.a[1]
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select * from test2b join test2a on test2b.a = test2a.a[1]
    +POSTHOOK: type: QUERY
    +Explain
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Tez
    + Edges:
    + Map 1 <- Map 2 (BROADCAST_EDGE)
    +#### A masked pattern was here ####
    + Vertices:
    + Map 1
    + Map Operator Tree:
    + TableScan
    + alias: test2b
    + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a is not null (type: boolean)
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 a (type: int)
    + 1 a[1] (type: int)
    + outputColumnNames: _col0, _col4
    + input vertices:
    + 1 Map 2
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + HybridGraceHashJoin: true
    + Select Operator
    + expressions: _col0 (type: int), _col4 (type: array<int>)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Map 2
    + Map Operator Tree:
    + TableScan
    + alias: test2a
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a[1] is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: a[1] (type: int)
    + sort order: +
    + Map-reduce partition columns: a[1] (type: int)
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + value expressions: a (type: array<int>)
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@test2a
    +PREHOOK: Input: default@test2b
    +#### A masked pattern was here ####
    +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@test2a
    +POSTHOOK: Input: default@test2b
    +#### A masked pattern was here ####
    +test2b.a test2a.a
    +2 [1,2]

    http://git-wip-us.apache.org/repos/asf/hive/blob/1934214a/ql/src/test/results/clientpositive/vector_complex_join.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out
    new file mode 100644
    index 0000000..2e6f32e
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out
    @@ -0,0 +1,224 @@
    +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
    +--
    +CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test
    +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query.
    +--
    +CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test
    +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@test
    +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@test
    +POSTHOOK: Lineage: test.a SIMPLE []
    +POSTHOOK: Lineage: test.b EXPRESSION []
    +_c0 _c1
    +PREHOOK: query: explain
    +select * from alltypesorc join test where alltypesorc.cint=test.a
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select * from alltypesorc join test where alltypesorc.cint=test.a
    +POSTHOOK: type: QUERY
    +Explain
    +STAGE DEPENDENCIES:
    + Stage-4 is a root stage
    + Stage-3 depends on stages: Stage-4
    + Stage-0 depends on stages: Stage-3
    +
    +STAGE PLANS:
    + Stage: Stage-4
    + Map Reduce Local Work
    + Alias -> Map Local Tables:
    + test
    + Fetch Operator
    + limit: -1
    + Alias -> Map Local Operator Tree:
    + test
    + TableScan
    + alias: test
    + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE
    + HashTable Sink Operator
    + keys:
    + 0 cint (type: int)
    + 1 a (type: int)
    +
    + Stage: Stage-3
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: alltypesorc
    + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: cint is not null (type: boolean)
    + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 cint (type: int)
    + 1 a (type: int)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col15, _col16
    + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: (_col2 = _col15) (type: boolean)
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col15 (type: int), _col16 (type: map<int,string>)
    + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@alltypesorc
    +PREHOOK: Input: default@test
    +#### A masked pattern was here ####
    +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@alltypesorc
    +POSTHOOK: Input: default@test
    +#### A masked pattern was here ####
    +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b
    +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"}
    +PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test2a
    +POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test2a
    +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@src
    +PREHOOK: Output: default@test2a
    +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@src
    +POSTHOOK: Output: default@test2a
    +POSTHOOK: Lineage: test2a.a EXPRESSION []
    +_c0
    +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@test2b
    +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@test2b
    +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__1
    +PREHOOK: Output: default@test2b
    +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__1
    +POSTHOOK: Output: default@test2b
    +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +_col0
    +PREHOOK: query: explain
    +select * from test2b join test2a on test2b.a = test2a.a[1]
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select * from test2b join test2a on test2b.a = test2a.a[1]
    +POSTHOOK: type: QUERY
    +Explain
    +STAGE DEPENDENCIES:
    + Stage-4 is a root stage
    + Stage-3 depends on stages: Stage-4
    + Stage-0 depends on stages: Stage-3
    +
    +STAGE PLANS:
    + Stage: Stage-4
    + Map Reduce Local Work
    + Alias -> Map Local Tables:
    + test2b
    + Fetch Operator
    + limit: -1
    + Alias -> Map Local Operator Tree:
    + test2b
    + TableScan
    + alias: test2b
    + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a is not null (type: boolean)
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + HashTable Sink Operator
    + keys:
    + 0 a (type: int)
    + 1 a[1] (type: int)
    +
    + Stage: Stage-3
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: test2a
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: a[1] is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Map Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 a (type: int)
    + 1 a[1] (type: int)
    + outputColumnNames: _col0, _col4
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: _col0 (type: int), _col4 (type: array<int>)
    + outputColumnNames: _col0, _col1
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    + Local Work:
    + Map Reduce Local Work
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@test2a
    +PREHOOK: Input: default@test2b
    +#### A masked pattern was here ####
    +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1]
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@test2a
    +POSTHOOK: Input: default@test2b
    +#### A masked pattern was here ####
    +test2b.a test2a.a
    +2 [1,2]

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 29, '16 at 8:56a
activeMar 29, '16 at 9:17a
posts2
users1
websitehive.apache.org

1 user in discussion

Mmccline: 2 posts

People

Translate

site design / logo © 2021 Grokbase