FAQ
Repository: hive
Updated Branches:
   refs/heads/master b1c45029e -> e384b2b65


HIVE-12531 : Implement fast-path for Year/Month UDFs for dates between 1999 and 2038 (Jason Dere via Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e384b2b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e384b2b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e384b2b6

Branch: refs/heads/master
Commit: e384b2b657c819d5963b8f76222f78bb479a29a2
Parents: b75d9ea
Author: Jason Dere <jdere@hortonworks.com>
Authored: Wed Dec 9 11:48:00 2015 -0800
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Fri Mar 25 07:21:55 2016 -0700

----------------------------------------------------------------------
  .../org/apache/hadoop/hive/ql/udf/UDFMonth.java | 16 ++++++++--------
  .../java/org/apache/hadoop/hive/ql/udf/UDFYear.java | 16 ++++++++--------
  .../expressions/TestVectorDateExpressions.java | 13 ++++++++++---
  3 files changed, 26 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
index 8c2b0e4..05afb8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
@@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf;

  import java.text.ParseException;
  import java.text.SimpleDateFormat;
-import java.util.Calendar;
  import java.util.Date;
+import org.joda.time.MutableDateTime;

  import org.apache.hadoop.hive.ql.exec.Description;
  import org.apache.hadoop.hive.ql.exec.UDF;
@@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text;
  @NDV(maxNdv = 31)
  public class UDFMonth extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
- private final Calendar calendar = Calendar.getInstance();
+ private transient final MutableDateTime mdt = new MutableDateTime();

    private final IntWritable result = new IntWritable();

@@ -75,8 +75,8 @@ public class UDFMonth extends UDF {
      }
      try {
        Date date = formatter.parse(dateString.toString());
- calendar.setTime(date);
- result.set(1 + calendar.get(Calendar.MONTH));
+ mdt.setMillis(date.getTime());
+ result.set(mdt.getMonthOfYear());
        return result;
      } catch (ParseException e) {
        return null;
@@ -88,8 +88,8 @@ public class UDFMonth extends UDF {
        return null;
      }

- calendar.setTime(d.get());
- result.set(1 + calendar.get(Calendar.MONTH));
+ mdt.setMillis(d.get().getTime());
+ result.set(mdt.getMonthOfYear());
      return result;
    }

@@ -98,8 +98,8 @@ public class UDFMonth extends UDF {
        return null;
      }

- calendar.setTime(t.getTimestamp());
- result.set(1 + calendar.get(Calendar.MONTH));
+ mdt.setMillis(t.getTimestamp().getTime());
+ result.set(mdt.getMonthOfYear());
      return result;
    }


http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
index d7ecd8c..fb3a655 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
@@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf;

  import java.text.ParseException;
  import java.text.SimpleDateFormat;
-import java.util.Calendar;
  import java.util.Date;
+import org.joda.time.MutableDateTime;

  import org.apache.hadoop.hive.ql.exec.Description;
  import org.apache.hadoop.hive.ql.exec.UDF;
@@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text;
  @NDV(maxNdv = 20) // although technically its unbounded, its unlikely we will ever see ndv > 20
  public class UDFYear extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
- private final Calendar calendar = Calendar.getInstance();
+ private transient final MutableDateTime mdt = new MutableDateTime();

    private final IntWritable result = new IntWritable();

@@ -77,8 +77,8 @@ public class UDFYear extends UDF {

      try {
        Date date = formatter.parse(dateString.toString());
- calendar.setTime(date);
- result.set(calendar.get(Calendar.YEAR));
+ mdt.setMillis(date.getTime());
+ result.set(mdt.getYear());
        return result;
      } catch (ParseException e) {
        return null;
@@ -90,8 +90,8 @@ public class UDFYear extends UDF {
        return null;
      }

- calendar.setTime(d.get());
- result.set(calendar.get(Calendar.YEAR));
+ mdt.setMillis(d.get().getTime());
+ result.set(mdt.getYear());
      return result;
    }

@@ -100,8 +100,8 @@ public class UDFYear extends UDF {
        return null;
      }

- calendar.setTime(t.getTimestamp());
- result.set(calendar.get(Calendar.YEAR));
+ mdt.setMillis(t.getTimestamp().getTime());
+ result.set(mdt.getYear());
      return result;
    }


http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
index 58cecc1..61c96e9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
@@ -38,7 +38,9 @@ import org.junit.Test;
  import org.junit.internal.runners.statements.Fail;

  import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.sun.tools.javac.resources.javac;

+import java.sql.Date;
  import java.sql.Timestamp;
  import java.util.ArrayList;
  import java.util.Calendar;
@@ -53,6 +55,7 @@ import java.util.concurrent.ThreadFactory;
  public class TestVectorDateExpressions {

    private ExecutorService runner;
+ private static final int MAX_SANE_DATE_VALUE = new DateWritable(Date.valueOf("3000-01-01")).getDays();

    /* copied over from VectorUDFTimestampFieldLong */
    private TimestampWritable toTimestampWritable(long daysSinceEpoch) {
@@ -78,11 +81,15 @@ public class TestVectorDateExpressions {
    }

    private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) {
+ return getVectorizedRandomRowBatch(seed, size, Integer.MAX_VALUE);
+ }
+
+ private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, int maxValue) {
      VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
      LongColumnVector lcv = new LongColumnVector(size);
      Random rand = new Random(seed);
      for (int i = 0; i < size; i++) {
- lcv.vector[i] = (rand.nextInt());
+ lcv.vector[i] = (rand.nextInt(maxValue));
      }
      batch.cols[0] = lcv;
      batch.cols[1] = new LongColumnVector(size);
@@ -159,7 +166,7 @@ public class TestVectorDateExpressions {
      batch.cols[0].isNull[0] = true;
      verifyUDFYear(batch);

- batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE);
      verifyUDFYear(batch);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
      verifyUDFYear(batch);
@@ -283,7 +290,7 @@ public class TestVectorDateExpressions {
      batch.cols[0].isNull[0] = true;
      verifyUDFMonth(batch);

- batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE);
      verifyUDFMonth(batch);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
      verifyUDFMonth(batch);

Search Discussions

  • Hashutosh at Mar 25, 2016 at 2:22 pm
    HIVE-12552 : Wrong number of reducer estimation causing job to fail (Rajesh Balamohan via Gunther Hagleitner)


    Project: http://git-wip-us.apache.org/repos/asf/hive/repo
    Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b75d9ea8
    Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b75d9ea8
    Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b75d9ea8

    Branch: refs/heads/master
    Commit: b75d9ea8a73f85d1420f8e3ba1e3b8f9b9acdc5e
    Parents: b1c4502
    Author: Rajesh Balamohan <rba...@...apache dot org>
    Authored: Wed Dec 9 11:48:00 2015 -0800
    Committer: Ashutosh Chauhan <hashutosh@apache.org>
    Committed: Fri Mar 25 07:21:55 2016 -0700

    ----------------------------------------------------------------------
      ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java | 3 ++-
      1 file changed, 2 insertions(+), 1 deletion(-)
    ----------------------------------------------------------------------


    http://git-wip-us.apache.org/repos/asf/hive/blob/b75d9ea8/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
    ----------------------------------------------------------------------
    diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
    index 8eab3af..d5a2eca 100644
    --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
    +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
    @@ -119,7 +119,8 @@ public class GenTezUtils {

            // max we allow tez to pick
            int maxPartition = (int) (reduceSink.getConf().getNumReducers() * maxPartitionFactor);
    - maxPartition = (maxPartition > maxReducers) ? maxReducers : maxPartition;
    + maxPartition = Math.max(1, (maxPartition > maxReducers) ? maxReducers :
    + maxPartition);

            reduceWork.setMinReduceTasks(minPartition);
            reduceWork.setMaxReduceTasks(maxPartition);

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedMar 25, '16 at 2:22p
activeMar 25, '16 at 2:22p
posts2
users1
websitehive.apache.org

1 user in discussion

Hashutosh: 2 posts

People

Translate

site design / logo © 2021 Grokbase