FAQ
Author: namit
Date: Tue Oct 2 07:15:37 2012
New Revision: 1392761

URL: http://svn.apache.org/viewvc?rev=1392761&view=rev
Log:
HIVE-3495 For UDAFs, when generating a plan without map-side-aggregation, constant
agg parameters will be replaced by ExprNodeColumnDesc (Yin Huai via namit)



Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q
hive/trunk/ql/src/test/results/clientpositive/count.q.out
hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out
hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out
hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue Oct 2 07:15:37 2012
@@ -165,6 +165,7 @@ import org.apache.hadoop.hive.serde2.Des
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -2480,6 +2481,50 @@ public class SemanticAnalyzer extends Ba
}

/**
+ * Check if the given internalName represents a constant parameter in aggregation parameters
+ * of an aggregation tree.
+ * This method is only invoked when map-side aggregation is not involved. In this case,
+ * every parameter in every aggregation tree should already have a corresponding ColumnInfo,
+ * which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
+ * generating is generated. If we find that this parameter is a constant parameter,
+ * we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
+ * use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
+ * parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
+ * a instance of ConstantObjectInspector).
+ *
+ * @param reduceValues
+ * value columns of the corresponding ReduceSinkOperator
+ * @param internalName
+ * the internal name of this parameter
+ * @return the ExprNodeDesc of the constant parameter if the given internalName represents
+ * a constant parameter; otherwise, return null
+ */
+ private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
+ List<ExprNodeDesc> reduceValues) {
+ // only the pattern of "VALUE._col([0-9]+)" should be handled.
+
+ String[] terms = internalName.split("\\.");
+ if (terms.length != 2 || reduceValues == null) {
+ return null;
+ }
+
+ if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
+ int pos = getPositionFromInternalName(terms[1]);
+ if (pos >= 0 && pos < reduceValues.size()) {
+ ExprNodeDesc reduceValue = reduceValues.get(pos);
+ if (reduceValue != null) {
+ if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
+ // this internalName represents a constant parameter in aggregation parameters
+ return reduceValue;
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ /**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
@@ -2528,12 +2573,14 @@ public class SemanticAnalyzer extends Ba
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
+ List<ExprNodeDesc> reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List<String> inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
}
+ reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
@@ -2565,9 +2612,19 @@ public class SemanticAnalyzer extends Ba
getColumnInternalName(i-1);

}
- aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
+
+ ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
- paraExprInfo.getIsVirtualCol()));
+ paraExprInfo.getIsVirtualCol());
+ ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
+ paraExprInfo.getInternalName(), reduceValues);
+
+ if (reduceValue != null) {
+ // this parameter is a constant
+ expr = reduceValue;
+ }
+
+ aggParameters.add(expr);
}

if (isDistinct) {
@@ -2653,12 +2710,14 @@ public class SemanticAnalyzer extends Ba
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
+ List<ExprNodeDesc> reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List<String> inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1);
}
+ reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
@@ -2699,9 +2758,20 @@ public class SemanticAnalyzer extends Ba
+ getColumnInternalName(i-1);

}
- aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
+
+ ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
- paraExprInfo.getIsVirtualCol()));
+ paraExprInfo.getIsVirtualCol());
+ ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
+ paraExprInfo.getInternalName(), reduceValues);
+
+ if (reduceValue != null) {
+ // this parameter is a constant
+ expr = reduceValue;
+ }
+
+ aggParameters.add(expr);
+
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);

Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx.q Tue Oct 2 07:15:37 2012
@@ -1,6 +1,26 @@

set mapred.reduce.tasks=4;
set hive.exec.reducers.max=4;
+set hive.map.aggr=false;
+-- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src;
+
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src;
+SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src;
+
+set hive.map.aggr=true;
+-- enable map-side aggregation
SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src;
SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src;
SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src;

Modified: hive/trunk/ql/src/test/results/clientpositive/count.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/count.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/count.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/count.q.out Tue Oct 2 07:15:37 2012
@@ -486,7 +486,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
expr: count()
expr: count(KEY._col0:14._col0)
expr: count(KEY._col0:14._col1)

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup.q.out Tue Oct 2 07:15:37 2012
@@ -176,7 +176,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
mode: partial1
outputColumnNames: _col0
@@ -264,7 +264,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
mode: complete
outputColumnNames: _col0

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup2.q.out Tue Oct 2 07:15:37 2012
@@ -251,7 +251,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
keys:
expr: KEY._col0
@@ -362,7 +362,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
keys:
expr: KEY._col0

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup4.q.out Tue Oct 2 07:15:37 2012
@@ -246,7 +246,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
expr: count(DISTINCT KEY._col0:0._col0)
bucketGroup: false
mode: partial1
@@ -347,7 +347,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
expr: count(DISTINCT KEY._col0:0._col0)
bucketGroup: false
mode: complete

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out Tue Oct 2 07:15:37 2012
@@ -137,7 +137,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
expr: count(DISTINCT KEY._col0:0._col0)
expr: count(DISTINCT KEY._col0:1._col0)
bucketGroup: false

Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out?rev=1392761&r1=1392760&r2=1392761&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx.q.out Tue Oct 2 07:15:37 2012
@@ -1,8 +1,120 @@
-PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+PREHOOK: query: -- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
-POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+POSTHOOK: query: -- disable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+252.77777777777777
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+252.77777777777777
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+255.5
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[24.07,252.77777777777777,476.9444444444444,487.82]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[24.07,252.77777777777777,476.9444444444444,487.82]
+PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[26.0,255.5,479.0,491.0]
+PREHOOK: query: -- enable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- enable map-side aggregation
+SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 2, '12 at 7:16a
activeOct 2, '12 at 7:16a
posts1
users1
websitehive.apache.org

1 user in discussion

Namit: 1 post

People

Translate

site design / logo © 2022 Grokbase