FAQ
Repository: hive
Updated Branches:
   refs/heads/master 8d524e062 -> 7201c264a


HIVE-11842: Improve RuleRegExp by caching some internal data structures (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79244ab4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79244ab4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79244ab4

Branch: refs/heads/master
Commit: 79244ab453823b8787b70a08f923e25c2abbd0bf
Parents: 8d524e0
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Sep 17 17:46:55 2015 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Thu Sep 17 17:46:55 2015 +0100

----------------------------------------------------------------------
  .../apache/hadoop/hive/ql/lib/RuleRegExp.java | 61 ++++++++++++++++----
  1 file changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/79244ab4/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
index fd5f133..1e850d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
@@ -19,7 +19,9 @@
  package org.apache.hadoop.hive.ql.lib;

  import java.util.Arrays;
+import java.util.HashMap;
  import java.util.HashSet;
+import java.util.Map;
  import java.util.Set;
  import java.util.Stack;
  import java.util.regex.Matcher;
@@ -125,6 +127,12 @@ public class RuleRegExp implements Rule {
     */
    private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
      int numElems = (stack != null ? stack.size() : 0);
+
+ // No elements
+ if (numElems == 0) {
+ return -1;
+ }
+
      int patLen = patternWithoutWildCardChar.length();
      StringBuilder name = new StringBuilder(patLen + numElems);
      for (int pos = numElems - 1; pos >= 0; pos--) {
@@ -133,9 +141,8 @@ public class RuleRegExp implements Rule {
        if (name.length() >= patLen) {
          if (patternWithoutWildCardChar.contentEquals(name)) {
            return patLen;
- } else {
- return -1;
          }
+ break;
        }
      }
      return -1;
@@ -152,20 +159,54 @@ public class RuleRegExp implements Rule {
     */
    private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
      int numElems = (stack != null ? stack.size() : 0);
+
+ // No elements
+ if (numElems == 0) {
+ return -1;
+ }
+
+ // These DS are used to cache previously created String
+ Map<Integer,String> cachedNames = new HashMap<Integer,String>();
+ int maxDepth = numElems;
+ int maxLength = 0;
+
+ // For every pattern
      for (String pattern : patternORWildChar) {
        int patLen = pattern.length();

- StringBuilder name = new StringBuilder(patLen + numElems);
- for (int pos = numElems - 1; pos >= 0; pos--) {
- String nodeName = stack.get(pos).getName() + "%";
- name.insert(0, nodeName);
- if (name.length() >= patLen) {
- if (pattern.contentEquals(name)) {
- return patLen;
- } else {
+ // If the stack has been explored already till that level,
+ // obtained cached String
+ if (cachedNames.containsKey(patLen)) {
+ if (pattern.contentEquals(cachedNames.get(patLen))) {
+ return patLen;
+ }
+ } else if (maxLength >= patLen) {
+ // We have already explored the stack deep enough, but
+ // we do not have a matching
+ continue;
+ } else {
+ // We are going to build the name
+ StringBuilder name = new StringBuilder(patLen + numElems);
+ if (maxLength != 0) {
+ name.append(cachedNames.get(maxLength));
+ }
+ for (int pos = maxDepth - 1; pos >= 0; pos--) {
+ String nodeName = stack.get(pos).getName() + "%";
+ name.insert(0, nodeName);
+
+ // We cache the values
+ cachedNames.put(name.length(), name.toString());
+ maxLength = name.length();
+ maxDepth--;
+
+ if (name.length() >= patLen) {
+ if (pattern.contentEquals(name)) {
+ return patLen;
+ }
              break;
            }
          }
+
        }
      }
      return -1;

Search Discussions

  • Jcamacho at Sep 17, 2015 at 4:48 pm
    HIVE-11789: Better support for functions recognition in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7201c264
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7201c264
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7201c264

    Branch: refs/heads/master
    Commit: 7201c264a1fe8347fd87fc8c1bb835083e9aac75
    Parents: 79244ab
    Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Authored: Thu Sep 17 17:48:01 2015 +0100
    Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
    Committed: Thu Sep 17 17:48:01 2015 +0100

    ----------------------------------------------------------------------
      .../calcite/reloperators/HiveBetween.java | 75 ++++++++++++++++++++
      .../optimizer/calcite/reloperators/HiveIn.java | 41 +++++++++++
      .../calcite/rules/HivePreFilteringRule.java | 37 +++-------
      .../translator/SqlFunctionConverter.java | 16 ++++-
      4 files changed, 142 insertions(+), 27 deletions(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
    new file mode 100644
    index 0000000..2388939
    --- /dev/null
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
    @@ -0,0 +1,75 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
    +
    +import org.apache.calcite.rel.type.RelDataType;
    +import org.apache.calcite.rel.type.RelDataTypeFactory;
    +import org.apache.calcite.sql.SqlCallBinding;
    +import org.apache.calcite.sql.SqlKind;
    +import org.apache.calcite.sql.SqlNode;
    +import org.apache.calcite.sql.SqlSpecialOperator;
    +import org.apache.calcite.sql.type.ReturnTypes;
    +import org.apache.calcite.sql.type.SqlOperandTypeInference;
    +import org.apache.calcite.sql.type.SqlTypeName;
    +
    +public class HiveBetween extends SqlSpecialOperator {
    +
    + public static final SqlSpecialOperator INSTANCE =
    + new HiveBetween();
    +
    + private HiveBetween() {
    + super(
    + "BETWEEN",
    + SqlKind.BETWEEN,
    + 30,
    + true,
    + ReturnTypes.BOOLEAN_NULLABLE,
    + FIRST_BOOLEAN_THEN_FIRST_KNOWN,
    + null);
    + }
    +
    + /**
    + * Operand type-inference strategy where an unknown operand type is derived
    + * from the first operand with a known type, but the first operand is a boolean.
    + */
    + public static final SqlOperandTypeInference FIRST_BOOLEAN_THEN_FIRST_KNOWN =
    + new SqlOperandTypeInference() {
    + public void inferOperandTypes(
    + SqlCallBinding callBinding,
    + RelDataType returnType,
    + RelDataType[] operandTypes) {
    + final RelDataType unknownType =
    + callBinding.getValidator().getUnknownType();
    + RelDataType knownType = unknownType;
    + for (int i = 1; i < callBinding.getCall().getOperandList().size(); i++) {
    + SqlNode operand = callBinding.getCall().getOperandList().get(i);
    + knownType = callBinding.getValidator().deriveType(
    + callBinding.getScope(), operand);
    + if (!knownType.equals(unknownType)) {
    + break;
    + }
    + }
    +
    + RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
    + operandTypes[0] = typeFactory.createSqlType(SqlTypeName.BOOLEAN);
    + for (int i = 1; i < operandTypes.length; ++i) {
    + operandTypes[i] = knownType;
    + }
    + }
    + };
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
    new file mode 100644
    index 0000000..6d87003
    --- /dev/null
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
    @@ -0,0 +1,41 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License. You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
    +
    +import org.apache.calcite.sql.SqlKind;
    +import org.apache.calcite.sql.SqlSpecialOperator;
    +import org.apache.calcite.sql.type.InferTypes;
    +import org.apache.calcite.sql.type.ReturnTypes;
    +
    +public class HiveIn extends SqlSpecialOperator {
    +
    + public static final SqlSpecialOperator INSTANCE =
    + new HiveIn();
    +
    + private HiveIn() {
    + super(
    + "IN",
    + SqlKind.IN,
    + 30,
    + true,
    + ReturnTypes.BOOLEAN_NULLABLE,
    + InferTypes.FIRST_KNOWN,
    + null);
    + }
    +
    +}

    http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
    index dde6288..3e2311c 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
    @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

      import java.util.ArrayList;
      import java.util.Collection;
    +import java.util.EnumSet;
      import java.util.List;
      import java.util.Map.Entry;
      import java.util.Set;
    @@ -41,22 +42,11 @@ import org.apache.calcite.rex.RexUtil;
      import org.apache.calcite.sql.SqlKind;
      import org.apache.commons.logging.Log;
      import org.apache.commons.logging.LogFactory;
    -import org.apache.hadoop.hive.ql.exec.Description;
      import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
    -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;

      import com.google.common.collect.ImmutableList;
      import com.google.common.collect.LinkedHashMultimap;
      import com.google.common.collect.Multimap;
    -import com.google.common.collect.Sets;


      public class HivePreFilteringRule extends RelOptRule {
    @@ -71,18 +61,13 @@ public class HivePreFilteringRule extends RelOptRule {
        private final FilterFactory filterFactory;


    - private static final Set<String> COMPARISON_UDFS = Sets.newHashSet(
    - GenericUDFOPEqual.class.getAnnotation(Description.class).name(),
    - GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(),
    - GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(),
    - GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(),
    - GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(),
    - GenericUDFOPLessThan.class.getAnnotation(Description.class).name(),
    - GenericUDFOPNotEqual.class.getAnnotation(Description.class).name());
    - private static final String IN_UDF =
    - GenericUDFIn.class.getAnnotation(Description.class).name();
    - private static final String BETWEEN_UDF =
    - GenericUDFBetween.class.getAnnotation(Description.class).name();
    + private static final Set<SqlKind> COMPARISON = EnumSet.of(
    + SqlKind.EQUALS,
    + SqlKind.GREATER_THAN_OR_EQUAL,
    + SqlKind.LESS_THAN_OR_EQUAL,
    + SqlKind.GREATER_THAN,
    + SqlKind.LESS_THAN,
    + SqlKind.NOT_EQUALS);


        private HivePreFilteringRule() {
    @@ -176,7 +161,7 @@ public class HivePreFilteringRule extends RelOptRule {
                continue;
              }
              RexCall conjCall = (RexCall) conjunction;
    - if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) {
    + if(COMPARISON.contains(conjCall.getOperator().getKind())) {
                if (conjCall.operands.get(0) instanceof RexInputRef &&
                        conjCall.operands.get(1) instanceof RexLiteral) {
                  reductionCondition.put(conjCall.operands.get(0).toString(),
    @@ -188,11 +173,11 @@ public class HivePreFilteringRule extends RelOptRule {
                          conjCall);
                  addedToReductionCondition = true;
                }
    - } else if(conjCall.getOperator().getName().equals(IN_UDF)) {
    + } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
                reductionCondition.put(conjCall.operands.get(0).toString(),
                        conjCall);
                addedToReductionCondition = true;
    - } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) {
    + } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
                reductionCondition.put(conjCall.operands.get(1).toString(),
                        conjCall);
                addedToReductionCondition = true;

    http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
    index 219289c..fd78824 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
    @@ -45,6 +45,8 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
      import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
      import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
      import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
    +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
    +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
      import org.apache.hadoop.hive.ql.parse.ASTNode;
      import org.apache.hadoop.hive.ql.parse.HiveParser;
      import org.apache.hadoop.hive.ql.parse.ParseDriver;
    @@ -193,7 +195,16 @@ public class SqlFunctionConverter {
          HiveToken hToken = calciteToHiveToken.get(op);
          ASTNode node;
          if (hToken != null) {
    - node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
    + switch (op.kind) {
    + case IN:
    + case BETWEEN:
    + case ROW:
    + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
    + node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
    + break;
    + default:
    + node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
    + }
          } else {
            node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
            if (op.kind != SqlKind.CAST) {
    @@ -296,6 +307,9 @@ public class SqlFunctionConverter {
                hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
            registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
            registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
    + registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
    + registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
    + registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
          }

          private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedSep 17, '15 at 4:48p
activeSep 17, '15 at 4:48p
posts2
users1
websitehive.apache.org

1 user in discussion

Jcamacho: 2 posts

People

Translate

site design / logo © 2021 Grokbase