Grokbase Groups Hive commits May 2015
FAQ
Repository: hive
Updated Branches:
   refs/heads/master 35e91f267 -> c6a09ce30


HIVE-10698 : query on view results fails with table not found error if view is created with subquery alias (CTE). (Pengcheng Xiong via Ashutosh Chauhan, John Pullokkaran)

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6a09ce3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6a09ce3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6a09ce3

Branch: refs/heads/master
Commit: c6a09ce30c0e9a1a6cc9eb3be37c1363997823dd
Parents: 66acd26
Author: Pengcheng Xiong <pxiong@hortonworks.com>
Authored: Thu May 14 20:42:00 2015 -0700
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Sun May 31 00:49:46 2015 -0700

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/parse/HiveParser.g | 4 +-
  .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 66 ++---
  ql/src/test/queries/clientpositive/cteViews.q | 41 ++++
  .../test/results/clientpositive/cteViews.q.out | 242 +++++++++++++++++++
  4 files changed, 309 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c6a09ce3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 4ec1925..b267bd2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2127,7 +2127,7 @@ queryStatementExpression[boolean topLevel]
      (w=withClause {topLevel}?)?
      queryStatementExpressionBody[topLevel] {
        if ($w.tree != null) {
- adaptor.addChild($queryStatementExpressionBody.tree, $w.tree);
+ $queryStatementExpressionBody.tree.insertChild(0, $w.tree);
        }
      }
      -> queryStatementExpressionBody
@@ -2302,7 +2302,7 @@ selectStatementWithCTE
      (w=withClause)?
      selectStatement[true] {
        if ($w.tree != null) {
- adaptor.addChild($selectStatement.tree, $w.tree);
+ $selectStatement.tree.insertChild(0, $w.tree);
        }
      }
      -> selectStatement

http://git-wip-us.apache.org/repos/asf/hive/blob/c6a09ce3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 351c267..d609732 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -709,10 +709,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {

      qb.getParseInfo().setSrcForAlias(alias, tableTree);

- unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
- if (aliasIndex != 0) {
- unparseTranslator.addIdentifierTranslation((ASTNode) tabref
- .getChild(aliasIndex));
+ // if alias to CTE contains the alias, we do not do the translation because
+ // cte is actually a subquery.
+ if (!this.aliasToCTEs.containsKey(alias)) {
+ unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
+ if (aliasIndex != 0) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
+ }
      }

      return alias;
@@ -952,19 +955,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     *
     */
    private ASTNode findCTEFromName(QB qb, String cteName) {
-
- /*
- * When saving a view definition all table references in the AST are qualified; including CTE references.
- * Where as CTE definitions have no DB qualifier; so we strip out the DB qualifier before searching in
- * <code>aliasToCTEs</code> map.
- */
- String currDB = SessionState.get().getCurrentDatabase();
- if ( currDB != null && cteName.startsWith(currDB) &&
- cteName.length() > currDB.length() &&
- cteName.charAt(currDB.length()) == '.' ) {
- cteName = cteName.substring(currDB.length() + 1);
- }
-
      StringBuffer qId = new StringBuffer();
      if (qb.getId() != null) {
        qId.append(qb.getId());
@@ -997,14 +987,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
      cteAlias = cteAlias == null ? cteName : cteAlias;
      ASTNode cteQryNode = findCTEFromName(qb, cteName);
      QBExpr cteQBExpr = new QBExpr(cteAlias);
-
- String cteText = ctx.getTokenRewriteStream().toString(
- cteQryNode.getTokenStartIndex(), cteQryNode.getTokenStopIndex());
- final ASTNodeOrigin cteOrigin = new ASTNodeOrigin("CTE", cteName,
- cteText, cteAlias, cteQryNode);
- cteQryNode = (ASTNode) ParseDriver.adaptor.dupTree(cteQryNode);
- SubQueryUtils.setOriginDeep(cteQryNode, cteOrigin);
-
      doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
      qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
    }
@@ -1571,31 +1553,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {

        for (String alias : tabAliases) {
          String tab_name = qb.getTabNameForAlias(alias);
+
+ // we first look for this alias from CTE, and then from catalog.
+ /*
+ * if this s a CTE reference: Add its AST as a SubQuery to this QB.
+ */
+ ASTNode cteNode = findCTEFromName(qb, tab_name.toLowerCase());
+ if (cteNode != null) {
+ String cte_name = tab_name.toLowerCase();
+ if (ctesExpanded.contains(cte_name)) {
+ throw new SemanticException("Recursive cte " + tab_name + " detected (cycle: "
+ + StringUtils.join(ctesExpanded, " -> ") + " -> " + tab_name + ").");
+ }
+ addCTEAsSubQuery(qb, cte_name, alias);
+ sqAliasToCTEName.put(alias, cte_name);
+ continue;
+ }
+
          Table tab = db.getTable(tab_name, false);
          if (tab == null) {
- /*
- * if this s a CTE reference:
- * Add its AST as a SubQuery to this QB.
- */
- ASTNode cteNode = findCTEFromName(qb, tab_name.toLowerCase());
- if ( cteNode != null ) {
- String cte_name = tab_name.toLowerCase();
- if (ctesExpanded.contains(cte_name)) {
- throw new SemanticException("Recursive cte " + tab_name +
- " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
- " -> " + tab_name + ").");
- }
- addCTEAsSubQuery(qb, cte_name, alias);
- sqAliasToCTEName.put(alias, cte_name);
- continue;
- }
            ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
            if (null != src) {
              throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
            } else {
              throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
            }
-
          }

          // Disallow INSERT INTO on bucketized tables

http://git-wip-us.apache.org/repos/asf/hive/blob/c6a09ce3/ql/src/test/queries/clientpositive/cteViews.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cteViews.q b/ql/src/test/queries/clientpositive/cteViews.q
new file mode 100644
index 0000000..c076841
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cteViews.q
@@ -0,0 +1,41 @@
+with src1 as (select key from src order by key limit 5)
+select * from src1;
+
+use default;
+drop view v;
+create view v as with cte as (select key, value from src order by key limit 5)
+select key from cte;
+
+describe extended v;
+
+create database bug;
+use bug;
+select * from default.v;
+drop database bug;
+
+use default;
+drop view v;
+create view v as with cte as (select * from src order by key limit 5)
+select * from cte;
+
+describe extended v;
+
+create database bug;
+use bug;
+select * from default.v;
+drop database bug;
+
+
+use default;
+drop view v;
+create view v as with src1 as (select key from src order by key limit 5)
+select * from src1;
+
+describe extended v;
+
+create database bug;
+use bug;
+select * from default.v;
+use default;
+drop view v;
+drop database bug;

http://git-wip-us.apache.org/repos/asf/hive/blob/c6a09ce3/ql/src/test/results/clientpositive/cteViews.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cteViews.q.out b/ql/src/test/results/clientpositive/cteViews.q.out
new file mode 100644
index 0000000..6291784
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cteViews.q.out
@@ -0,0 +1,242 @@
+PREHOOK: query: with src1 as (select key from src order by key limit 5)
+select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: with src1 as (select key from src order by key limit 5)
+select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+0
+0
+10
+100
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view v as with cte as (select key, value from src order by key limit 5)
+select key from cte
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as with cte as (select key, value from src order by key limit 5)
+select key from cte
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+PREHOOK: query: describe extended v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: describe extended v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+key string
+
+#### A masked pattern was here ####
+select key from cte, viewExpandedText:with cte as (select `src`.`key`, `src`.`value` from `default`.`src` order by key limit 5)
+select `cte`.`key` from cte, tableType:VIRTUAL_VIEW)
+PREHOOK: query: create database bug
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:bug
+POSTHOOK: query: create database bug
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:bug
+PREHOOK: query: use bug
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:bug
+POSTHOOK: query: use bug
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:bug
+PREHOOK: query: select * from default.v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from default.v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+0
+0
+0
+10
+100
+PREHOOK: query: drop database bug
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:bug
+PREHOOK: Output: database:bug
+POSTHOOK: query: drop database bug
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:bug
+POSTHOOK: Output: database:bug
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@v
+PREHOOK: Output: default@v
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@v
+POSTHOOK: Output: default@v
+PREHOOK: query: create view v as with cte as (select * from src order by key limit 5)
+select * from cte
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as with cte as (select * from src order by key limit 5)
+select * from cte
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+PREHOOK: query: describe extended v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: describe extended v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+key string
+value string
+
+#### A masked pattern was here ####
+select * from cte, viewExpandedText:with cte as (select `src`.`key`, `src`.`value` from `default`.`src` order by `src`.`key` limit 5)
+select `cte`.`key`, `cte`.`value` from cte, tableType:VIRTUAL_VIEW)
+PREHOOK: query: create database bug
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:bug
+POSTHOOK: query: create database bug
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:bug
+PREHOOK: query: use bug
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:bug
+POSTHOOK: query: use bug
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:bug
+PREHOOK: query: select * from default.v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from default.v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+PREHOOK: query: drop database bug
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:bug
+PREHOOK: Output: database:bug
+POSTHOOK: query: drop database bug
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:bug
+POSTHOOK: Output: database:bug
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@v
+PREHOOK: Output: default@v
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@v
+POSTHOOK: Output: default@v
+PREHOOK: query: create view v as with src1 as (select key from src order by key limit 5)
+select * from src1
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as with src1 as (select key from src order by key limit 5)
+select * from src1
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+PREHOOK: query: describe extended v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: describe extended v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+key string
+
+#### A masked pattern was here ####
+select * from src1, viewExpandedText:with src1 as (select `src`.`key` from `default`.`src` order by key limit 5)
+select `src1`.`key` from src1, tableType:VIRTUAL_VIEW)
+PREHOOK: query: create database bug
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:bug
+POSTHOOK: query: create database bug
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:bug
+PREHOOK: query: use bug
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:bug
+POSTHOOK: query: use bug
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:bug
+PREHOOK: query: select * from default.v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from default.v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+0
+0
+0
+10
+100
+PREHOOK: query: use default
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:default
+POSTHOOK: query: use default
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:default
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@v
+PREHOOK: Output: default@v
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@v
+POSTHOOK: Output: default@v
+PREHOOK: query: drop database bug
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:bug
+PREHOOK: Output: database:bug
+POSTHOOK: query: drop database bug
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:bug
+POSTHOOK: Output: database:bug

Search Discussions

  • Hashutosh at May 31, 2015 at 7:50 am
    HIVE-10802 : Table join query with some constant field in select fails (Aihua Xu via Ashutosh Chauhan)

    Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/66acd266
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/66acd266
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/66acd266

    Branch: refs/heads/master
    Commit: 66acd26670e417a3b7c7e0abee3e94b71571e0e3
    Parents: 35e91f2
    Author: Aihua Xu <aihuaxu@gmail.com>
    Authored: Sat May 30 15:13:00 2015 -0700
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Sun May 31 00:49:46 2015 -0700

    ----------------------------------------------------------------------
      .../optimizer/ConstantPropagateProcFactory.java | 57 +++++----
      .../clientpositive/udf_nondeterministic.q | 11 ++
      .../clientpositive/spark/udf_in_file.q.out | 2 +-
      .../results/clientpositive/udf_in_file.q.out | 2 +-
      .../clientpositive/udf_nondeterministic.q.out | 120 +++++++++++++++++++
      5 files changed, 164 insertions(+), 28 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/66acd266/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
    index e66de1a..4a4814d 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
    @@ -196,9 +196,13 @@ public final class ConstantPropagateProcFactory {
        /**
         * Fold input expression desc.
         *
    - * If desc is a UDF and all parameters are constants, evaluate it. If desc is a column expression,
    - * find it from propagated constants, and if there is, replace it with constant.
    - *
    + * This function recursively checks if any subexpression of a specified expression
    + * can be evaluated to be constant and replaces such subexpression with the constant.
    + * If the expression is a derterministic UDF and all the subexpressions are constants,
    + * the value will be calculated immediately (during compilation time vs. runtime).
    + * e.g.:
    + * concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
    + * unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nonderministic UDF
         * @param desc folding expression
         * @param constants current propagated constant map
         * @param cppCtx
    @@ -213,12 +217,7 @@ public final class ConstantPropagateProcFactory {
          if (desc instanceof ExprNodeGenericFuncDesc) {
            ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;

    - // The function must be deterministic, or we can't fold it.
            GenericUDF udf = funcDesc.getGenericUDF();
    - if (!isDeterministicUdf(udf)) {
    - LOG.debug("Function " + udf.getClass() + " undeterministic, quit folding.");
    - return desc;
    - }

            boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
            List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
    @@ -226,27 +225,33 @@ public final class ConstantPropagateProcFactory {
              newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
            }

    - // If all child expressions are constants, evaluate UDF immediately
    - ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
    - if (constant != null) {
    - LOG.debug("Folding expression:" + desc + " -> " + constant);
    - return constant;
    + // Don't evalulate nondeterministic function since the value can only calculate during runtime.
    + if (!isDeterministicUdf(udf)) {
    + LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evalulating immediately.");
    + ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
    + return desc;
            } else {
    -
    - // Check if the function can be short cut.
    - ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
    - if (shortcut != null) {
    - LOG.debug("Folding expression:" + desc + " -> " + shortcut);
    - return shortcut;
    + // If all child expressions of deterministic function are constants, evaluate such UDF immediately
    + ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
    + if (constant != null) {
    + LOG.debug("Folding expression:" + desc + " -> " + constant);
    + return constant;
    + } else {
    + // Check if the function can be short cut.
    + ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
    + if (shortcut != null) {
    + LOG.debug("Folding expression:" + desc + " -> " + shortcut);
    + return shortcut;
    + }
    + ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
              }
    - ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
    - }

    - // If in some selected binary operators (=, is null, etc), one of the
    - // expressions are
    - // constant, add them to colToConstatns as half-deterministic columns.
    - if (propagate) {
    - propagate(udf, newExprs, op.getSchema(), constants);
    + // If in some selected binary operators (=, is null, etc), one of the
    + // expressions are
    + // constant, add them to colToConstants as half-deterministic columns.
    + if (propagate) {
    + propagate(udf, newExprs, op.getSchema(), constants);
    + }
            }

            return desc;

    http://git-wip-us.apache.org/repos/asf/hive/blob/66acd266/ql/src/test/queries/clientpositive/udf_nondeterministic.q
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/queries/clientpositive/udf_nondeterministic.q b/ql/src/test/queries/clientpositive/udf_nondeterministic.q
    new file mode 100644
    index 0000000..fcb5162
    --- /dev/null
    +++ b/ql/src/test/queries/clientpositive/udf_nondeterministic.q
    @@ -0,0 +1,11 @@
    +drop table if exists udf_tb1;
    +drop table if exists udf_tb2;
    +
    +create table udf_tb1 (year int, month int);
    +create table udf_tb2(month int);
    +insert into udf_tb1 values(2001, 11);
    +insert into udf_tb2 values(11);
    +
    +explain
    +select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month);
    +select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month);

    http://git-wip-us.apache.org/repos/asf/hive/blob/66acd266/ql/src/test/results/clientpositive/spark/udf_in_file.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/spark/udf_in_file.q.out b/ql/src/test/results/clientpositive/spark/udf_in_file.q.out
    index c5b6205..5ba62b0 100644
    --- a/ql/src/test/results/clientpositive/spark/udf_in_file.q.out
    +++ b/ql/src/test/results/clientpositive/spark/udf_in_file.q.out
    @@ -59,7 +59,7 @@ STAGE PLANS:
                alias: value_src
                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                Select Operator
    - expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean)
    + expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(null, '../../data/files/test2.dat') (type: boolean)
                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                  Limit

    http://git-wip-us.apache.org/repos/asf/hive/blob/66acd266/ql/src/test/results/clientpositive/udf_in_file.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/udf_in_file.q.out b/ql/src/test/results/clientpositive/udf_in_file.q.out
    index b9d0d25..6d2c84b 100644
    --- a/ql/src/test/results/clientpositive/udf_in_file.q.out
    +++ b/ql/src/test/results/clientpositive/udf_in_file.q.out
    @@ -59,7 +59,7 @@ STAGE PLANS:
                  alias: value_src
                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                  Select Operator
    - expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(UDFToString(null), '../../data/files/test2.dat') (type: boolean)
    + expressions: in_file(str_val, '../../data/files/test2.dat') (type: boolean), in_file(ch_val, '../../data/files/test2.dat') (type: boolean), in_file(vch_val, '../../data/files/test2.dat') (type: boolean), in_file(str_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(ch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file(vch_val_neg, '../../data/files/test2.dat') (type: boolean), in_file('303', '../../data/files/test2.dat') (type: boolean), in_file('304', '../../data/files/test2.dat') (type: boolean), in_file(null, '../../data/files/test2.dat') (type: boolean)
                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
                    Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                    Limit

    http://git-wip-us.apache.org/repos/asf/hive/blob/66acd266/ql/src/test/results/clientpositive/udf_nondeterministic.q.out
    ----------------------------------------------------------------------
    diff --git a/ql/src/test/results/clientpositive/udf_nondeterministic.q.out b/ql/src/test/results/clientpositive/udf_nondeterministic.q.out
    new file mode 100644
    index 0000000..eef5555
    --- /dev/null
    +++ b/ql/src/test/results/clientpositive/udf_nondeterministic.q.out
    @@ -0,0 +1,120 @@
    +PREHOOK: query: drop table if exists udf_tb1
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table if exists udf_tb1
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: drop table if exists udf_tb2
    +PREHOOK: type: DROPTABLE
    +POSTHOOK: query: drop table if exists udf_tb2
    +POSTHOOK: type: DROPTABLE
    +PREHOOK: query: create table udf_tb1 (year int, month int)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@udf_tb1
    +POSTHOOK: query: create table udf_tb1 (year int, month int)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@udf_tb1
    +PREHOOK: query: create table udf_tb2(month int)
    +PREHOOK: type: CREATETABLE
    +PREHOOK: Output: database:default
    +PREHOOK: Output: default@udf_tb2
    +POSTHOOK: query: create table udf_tb2(month int)
    +POSTHOOK: type: CREATETABLE
    +POSTHOOK: Output: database:default
    +POSTHOOK: Output: default@udf_tb2
    +PREHOOK: query: insert into udf_tb1 values(2001, 11)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__1
    +PREHOOK: Output: default@udf_tb1
    +POSTHOOK: query: insert into udf_tb1 values(2001, 11)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__1
    +POSTHOOK: Output: default@udf_tb1
    +POSTHOOK: Lineage: udf_tb1.month EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
    +POSTHOOK: Lineage: udf_tb1.year EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +PREHOOK: query: insert into udf_tb2 values(11)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@values__tmp__table__2
    +PREHOOK: Output: default@udf_tb2
    +POSTHOOK: query: insert into udf_tb2 values(11)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@values__tmp__table__2
    +POSTHOOK: Output: default@udf_tb2
    +POSTHOOK: Lineage: udf_tb2.month EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
    +PREHOOK: query: explain
    +select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month)
    +PREHOOK: type: QUERY
    +POSTHOOK: query: explain
    +select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month)
    +POSTHOOK: type: QUERY
    +STAGE DEPENDENCIES:
    + Stage-1 is a root stage
    + Stage-0 depends on stages: Stage-1
    +
    +STAGE PLANS:
    + Stage: Stage-1
    + Map Reduce
    + Map Operator Tree:
    + TableScan
    + alias: udf_tb1
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: ((year = 2001) and month is not null) (type: boolean)
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: month (type: int)
    + outputColumnNames: _col1
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: _col1 (type: int)
    + sort order: +
    + Map-reduce partition columns: _col1 (type: int)
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + TableScan
    + alias: b
    + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
    + Filter Operator
    + predicate: month is not null (type: boolean)
    + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
    + Reduce Output Operator
    + key expressions: month (type: int)
    + sort order: +
    + Map-reduce partition columns: month (type: int)
    + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
    + Reduce Operator Tree:
    + Join Operator
    + condition map:
    + Inner Join 0 to 1
    + keys:
    + 0 _col1 (type: int)
    + 1 month (type: int)
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + Select Operator
    + expressions: unix_timestamp('2001-01-01 00:00:00') (type: bigint)
    + outputColumnNames: _col0
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + File Output Operator
    + compressed: false
    + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
    + table:
    + input format: org.apache.hadoop.mapred.TextInputFormat
    + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
    + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
    +
    + Stage: Stage-0
    + Fetch Operator
    + limit: -1
    + Processor Tree:
    + ListSink
    +
    +PREHOOK: query: select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month)
    +PREHOOK: type: QUERY
    +PREHOOK: Input: default@udf_tb1
    +PREHOOK: Input: default@udf_tb2
    +#### A masked pattern was here ####
    +POSTHOOK: query: select unix_timestamp(concat(a.year, '-01-01 00:00:00')) from (select * from udf_tb1 where year=2001) a join udf_tb2 b on (a.month=b.month)
    +POSTHOOK: type: QUERY
    +POSTHOOK: Input: default@udf_tb1
    +POSTHOOK: Input: default@udf_tb2
    +#### A masked pattern was here ####
    +978336000

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMay 31, '15 at 7:50a
activeMay 31, '15 at 7:50a
posts2
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 2 posts

People

Translate

site design / logo © 2021 Grokbase