FAQ
Author: ehans
Date: Mon Dec 2 20:48:28 2013
New Revision: 1547185

URL: http://svn.apache.org/r1547185
Log:
HIVE-5581: Implement vectorized year/month/day... etc. for string arguments (Teddy Choi via Eric Hanson)

Added:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java
     hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
     hive/trunk/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
Modified:
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
     hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
     hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Expression to get day of month.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFDayOfMonthString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFDayOfMonthString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 8, 2);
+ }
+
+ public VectorUDFDayOfMonthString() {
+ super();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Returns hour of day.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFHourString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFHourString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 11, 2);
+ }
+
+ public VectorUDFHourString() {
+ super();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Returns minute value.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFMinuteString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFMinuteString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 14, 2);
+ }
+
+ public VectorUDFMinuteString() {
+ super();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Returns month value.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFMonthString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFMonthString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 5, 2);
+ }
+
+ public VectorUDFMonthString() {
+ super();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Expression to get seconds.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFSecondString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFSecondString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 17, 2);
+ }
+
+ public VectorUDFSecondString() {
+ super();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+import java.text.ParseException;
+
+/**
+ * Abstract class to return various fields from a String.
+ */
+public abstract class VectorUDFTimestampFieldString extends VectorExpression {
+
+ private static final long serialVersionUID = 1L;
+
+ protected int colNum;
+ protected int outputColumn;
+ protected final int fieldStart;
+ protected final int fieldLength;
+ private static final String patternMin = "0000-00-00 00:00:00.000000000";
+ private static final String patternMax = "9999-19-99 29:59:59.999999999";
+
+ public VectorUDFTimestampFieldString(int colNum, int outputColumn, int fieldStart, int fieldLength) {
+ this.colNum = colNum;
+ this.outputColumn = outputColumn;
+ this.fieldStart = fieldStart;
+ this.fieldLength = fieldLength;
+ }
+
+ public VectorUDFTimestampFieldString() {
+ fieldStart = -1;
+ fieldLength = -1;
+ }
+
+ private long getField(byte[] bytes, int start, int length) throws ParseException {
+ // Validate
+ for (int i = 0; i < length; i++) {
+ char ch = (char) bytes[start + i];
+ if (ch < patternMin.charAt(i) || ch > patternMax.charAt(i)) {
+ throw new ParseException("A timestamp string should match 'yyyy-MM-dd HH:mm:ss.fffffffff' pattern.", i);
+ }
+ }
+
+ return doGetField(bytes, start, length);
+ }
+
+ protected long doGetField(byte[] bytes, int start, int length) throws ParseException {
+ int field = 0;
+ if (length < fieldLength) {
+ throw new ParseException("A timestamp string should be longer.", 0);
+ }
+ for (int i = fieldStart; i < fieldStart + fieldLength; i++) {
+ byte ch = bytes[start + i];
+ field = 10 * field + (ch - '0');
+ }
+ return field;
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+ LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
+ BytesColumnVector inputCol = (BytesColumnVector)batch.cols[this.colNum];
+
+ final int n = inputCol.isRepeating ? 1 : batch.size;
+ int[] sel = batch.selected;
+
+ if (batch.size == 0) {
+
+ // n != batch.size when isRepeating
+ return;
+ }
+
+ // true for all algebraic UDFs with no state
+ outV.isRepeating = inputCol.isRepeating;
+
+ if (inputCol.noNulls) {
+ outV.noNulls = true;
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ try {
+ outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
+ outV.isNull[i] = false;
+ } catch (ParseException e) {
+ outV.noNulls = false;
+ outV.isNull[i] = true;
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ try {
+ outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
+ outV.isNull[i] = false;
+ } catch (ParseException e) {
+ outV.noNulls = false;
+ outV.isNull[i] = true;
+ }
+ }
+ }
+ } else {
+
+ // Handle case with nulls. Don't do function if the value is null, to save time,
+ // because calling the function can be expensive.
+ outV.noNulls = false;
+ if (batch.selectedInUse) {
+ for (int j = 0; j < n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ try {
+ outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
+ } catch (ParseException e) {
+ outV.isNull[i] = true;
+ }
+ }
+ }
+ } else {
+ for (int i = 0; i < n; i++) {
+ outV.isNull[i] = inputCol.isNull[i];
+ if (!inputCol.isNull[i]) {
+ try {
+ outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
+ } catch (ParseException e) {
+ outV.isNull[i] = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getOutputColumn() {
+ return this.outputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "long";
+ }
+
+ public int getColNum() {
+ return colNum;
+ }
+
+ public void setColNum(int colNum) {
+ this.colNum = colNum;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder();
+ b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(1)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.STRING)
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN);
+ return b.build();
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.io.Text;
+
+import java.nio.charset.CharacterCodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+
+/**
+ * Return Unix Timestamp.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ private transient final Calendar calendar = Calendar.getInstance();
+
+ public VectorUDFUnixTimeStampString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, -1, -1);
+ }
+
+ public VectorUDFUnixTimeStampString() {
+ super();
+ }
+
+ @Override
+ protected long doGetField(byte[] bytes, int start, int length) throws ParseException {
+ Date date = null;
+ try {
+ date = format.parse(Text.decode(bytes, start, length));
+ } catch (CharacterCodingException e) {
+ throw new ParseException(e.getMessage(), 0);
+ }
+ calendar.setTime(date);
+ return calendar.getTimeInMillis() / 1000;
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.io.Text;
+
+import java.nio.charset.CharacterCodingException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+
+/**
+ * Expression to get week of year.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFWeekOfYearString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
+ private transient final Calendar calendar = Calendar.getInstance();
+
+ public VectorUDFWeekOfYearString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, -1, -1);
+ initCalendar();
+ }
+
+ public VectorUDFWeekOfYearString() {
+ super();
+ initCalendar();
+ }
+
+ @Override
+ protected long doGetField(byte[] bytes, int start, int length) throws ParseException {
+ Date date = null;
+ try {
+ String decoded = Text.decode(bytes, start, length);
+ date = format.parse(decoded);
+ } catch (CharacterCodingException e) {
+ throw new ParseException(e.getMessage(), 0);
+ }
+ calendar.setTime(date);
+ return calendar.get(Calendar.WEEK_OF_YEAR);
+ }
+
+ private void initCalendar() {
+
+ // code copied over from UDFWeekOfYear implementation
+ calendar.setFirstDayOfWeek(Calendar.MONDAY);
+ calendar.setMinimalDaysInFirstWeek(4);
+ }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java Mon Dec 2 20:48:28 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+/**
+ * Expression to get year as a long.
+ * Extends {@link VectorUDFTimestampFieldString}
+ */
+public final class VectorUDFYearString extends VectorUDFTimestampFieldString {
+
+ private static final long serialVersionUID = 1L;
+
+ public VectorUDFYearString(int colNum, int outputColumn) {
+ super(colNum, outputColumn, 0, 4);
+ }
+
+ public VectorUDFYearString() {
+ super();
+ }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Mon Dec 2 20:48:28 2013
@@ -97,6 +97,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
  import org.apache.hadoop.hive.ql.udf.UDFLog10;
  import org.apache.hadoop.hive.ql.udf.UDFLog2;
  import org.apache.hadoop.hive.ql.udf.UDFMinute;
+import org.apache.hadoop.hive.ql.udf.UDFMonth;
  import org.apache.hadoop.hive.ql.udf.UDFRTrim;
  import org.apache.hadoop.hive.ql.udf.UDFRadians;
  import org.apache.hadoop.hive.ql.udf.UDFRand;
@@ -199,15 +200,16 @@ public class Vectorizer implements Physi
      supportedGenericUDFs.add(GenericUDFOPOr.class);
      supportedGenericUDFs.add(GenericUDFOPAnd.class);
      supportedGenericUDFs.add(GenericUDFOPEqual.class);
- supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);
+ supportedGenericUDFs.add(UDFLength.class);

+ supportedGenericUDFs.add(UDFYear.class);
+ supportedGenericUDFs.add(UDFMonth.class);
+ supportedGenericUDFs.add(UDFDayOfMonth.class);
      supportedGenericUDFs.add(UDFHour.class);
- supportedGenericUDFs.add(UDFLength.class);
      supportedGenericUDFs.add(UDFMinute.class);
      supportedGenericUDFs.add(UDFSecond.class);
- supportedGenericUDFs.add(UDFYear.class);
      supportedGenericUDFs.add(UDFWeekOfYear.class);
- supportedGenericUDFs.add(UDFDayOfMonth.class);
+ supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);

      supportedGenericUDFs.add(UDFLike.class);
      supportedGenericUDFs.add(UDFRegExp.class);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthString;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
@@ -42,7 +43,7 @@ import org.apache.hadoop.io.Text;
      + "'yyyy-MM-dd'.\n"
      + "Example:\n "
      + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 30")
-@VectorizedExpressions({VectorUDFDayOfMonthLong.class})
+@VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class})
  public class UDFDayOfMonth extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
    private final Calendar calendar = Calendar.getInstance();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourString;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.Text;
@@ -43,7 +44,7 @@ import org.apache.hadoop.io.Text;
      + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n"
      + " 12\n"
      + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 12")
-@VectorizedExpressions({VectorUDFHourLong.class})
+@VectorizedExpressions({VectorUDFHourLong.class, VectorUDFHourString.class})
  public class UDFHour extends UDF {
    private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss");

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteString;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.Text;
@@ -43,7 +44,7 @@ import org.apache.hadoop.io.Text;
      + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n"
      + " 58\n"
      + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 58")
-@VectorizedExpressions({VectorUDFMinuteLong.class})
+@VectorizedExpressions({VectorUDFMinuteLong.class, VectorUDFMinuteString.class})
  public class UDFMinute extends UDF {
    private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss");

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java Mon Dec 2 20:48:28 2013
@@ -25,6 +25,9 @@ import java.util.Date;

  import org.apache.hadoop.hive.ql.exec.Description;
  import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
@@ -38,6 +41,7 @@ import org.apache.hadoop.io.Text;
      value = "_FUNC_(date) - Returns the month of date",
      extended = "Example:\n"
      + " > SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + " 7")
+@VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class})
  public class UDFMonth extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
    private final Calendar calendar = Calendar.getInstance();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondString;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.Text;
@@ -43,7 +44,7 @@ import org.apache.hadoop.io.Text;
      + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n"
      + " 59\n"
      + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 59")
-@VectorizedExpressions({VectorUDFSecondLong.class})
+@VectorizedExpressions({VectorUDFSecondLong.class, VectorUDFSecondString.class})
  public class UDFSecond extends UDF {
    private final SimpleDateFormat formatter1 = new SimpleDateFormat(
        "yyyy-MM-dd HH:mm:ss");

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
@@ -43,7 +44,7 @@ import org.apache.hadoop.io.Text;
      + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n"
      + " 8\n"
      + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1")
-@VectorizedExpressions({VectorUDFWeekOfYearLong.class})
+@VectorizedExpressions({VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class})
  public class UDFWeekOfYear extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
    private final Calendar calendar = Calendar.getInstance();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java Mon Dec 2 20:48:28 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.De
  import org.apache.hadoop.hive.ql.exec.UDF;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString;
  import org.apache.hadoop.hive.serde2.io.DateWritable;
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
@@ -42,7 +43,7 @@ import org.apache.hadoop.io.Text;
      + "'yyyy-MM-dd'.\n"
      + "Example:\n "
      + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 2009")
-@VectorizedExpressions({VectorUDFYearLong.class})
+@VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class})
  public class UDFYear extends UDF {
    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
    private final Calendar calendar = Calendar.getInstance();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java Mon Dec 2 20:48:28 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.UD
  import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampString;
  import org.apache.hadoop.hive.ql.metadata.HiveException;
  import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector;
@@ -42,7 +43,7 @@ import org.apache.hadoop.io.LongWritable
  @Description(name = "to_unix_timestamp",
      value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp",
      extended = "Converts the specified time to number of seconds since 1970-01-01.")
-@VectorizedExpressions({VectorUDFUnixTimeStampLong.class})
+@VectorizedExpressions({VectorUDFUnixTimeStampLong.class, VectorUDFUnixTimeStampString.class})
  public class GenericUDFToUnixTimeStamp extends GenericUDF {

    private transient StringObjectInspector intputTextOI;

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java?rev=1547185&r1=1547184&r2=1547185&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java Mon Dec 2 20:48:28 2013
@@ -18,15 +18,22 @@

  package org.apache.hadoop.hive.ql.exec.vector.expressions;

+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
  import java.sql.Timestamp;
+import java.text.SimpleDateFormat;
  import java.util.ArrayList;
+import java.util.Arrays;
  import java.util.Calendar;
+import java.util.Date;
  import java.util.List;
  import java.util.Random;

  import junit.framework.Assert;

  import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
  import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
  import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -40,12 +47,14 @@ import org.apache.hadoop.hive.ql.udf.UDF
  import org.apache.hadoop.hive.serde2.io.TimestampWritable;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
  import org.junit.Test;

  /**
   * Unit tests for timestamp expressions.
   */
  public class TestVectorTimestampExpressions {
+ private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    /* copied over from VectorUDFTimestampFieldLong */
    private TimestampWritable toTimestampWritable(long nanos) {
@@ -97,6 +106,34 @@ public class TestVectorTimestampExpressi
      return batch;
    }

+ private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int size) {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+ BytesColumnVector bcv = new BytesColumnVector(size);
+ Random rand = new Random(seed);
+ for (int i = 0; i < size; i++) {
+ /* all 32 bit numbers qualify & multiply up to get nano-seconds */
+ byte[] encoded = encodeTime(1000 * 1000 * 1000 * rand.nextInt());
+ bcv.vector[i] = encoded;
+ bcv.start[i] = 0;
+ bcv.length[i] = encoded.length;
+ }
+ batch.cols[0] = bcv;
+ batch.cols[1] = new LongColumnVector(size);
+ batch.size = size;
+ return batch;
+ }
+
+ private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, TestType testType) {
+ switch (testType) {
+ case LONG2:
+ return getVectorizedRandomRowBatchLong2(seed, size);
+ case STRING_LONG:
+ return getVectorizedRandomRowBatchStringLong(seed, size);
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
    /*
     * Input array is used to fill the entire size of the vector row batch
     */
@@ -112,7 +149,83 @@ public class TestVectorTimestampExpressi
      return batch;
    }

- /*begin-macro*/
+ /*
+ * Input array is used to fill the entire size of the vector row batch
+ */
+ private VectorizedRowBatch getVectorizedRowBatchStringLong(long[] inputs, int size) {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
+ BytesColumnVector bcv = new BytesColumnVector(size);
+ for (int i = 0; i < size; i++) {
+ byte[] encoded = encodeTime(inputs[i % inputs.length]);
+ bcv.vector[i] = encoded;
+ bcv.start[i] = 0;
+ bcv.length[i] = encoded.length;
+ }
+ batch.cols[0] = bcv;
+ batch.cols[1] = new LongColumnVector(size);
+ batch.size = size;
+ return batch;
+ }
+
+ private VectorizedRowBatch getVectorizedRowBatchStringLong(byte[] vector, int start, int length) {
+ VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
+ BytesColumnVector bcv = new BytesColumnVector(1);
+
+ bcv.vector[0] = vector;
+ bcv.start[0] = start;
+ bcv.length[0] = length;
+
+ batch.cols[0] = bcv;
+ batch.cols[1] = new LongColumnVector(1);
+ batch.size = 1;
+ return batch;
+ }
+
+ private VectorizedRowBatch getVectorizedRowBatch(long[] inputs, int size, TestType testType) {
+ switch (testType) {
+ case LONG2:
+ return getVectorizedRowBatchLong2(inputs, size);
+ case STRING_LONG:
+ return getVectorizedRowBatchStringLong(inputs, size);
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private byte[] encodeTime(long time) {
+ ByteBuffer encoded;
+ try {
+ String formatted = dateFormat.format(new Date(time / (1000 * 1000)));
+ encoded = Text.encode(formatted);
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException(e);
+ }
+ return Arrays.copyOf(encoded.array(), encoded.limit());
+ }
+
+ private long decodeTime(byte[] time) {
+ try {
+ return dateFormat.parse(Text.decode(time)).getTime() * 1000 * 1000;
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private long readVectorElementAt(ColumnVector col, int i) {
+ if (col instanceof LongColumnVector) {
+ return ((LongColumnVector) col).vector[i];
+ }
+ if (col instanceof BytesColumnVector) {
+ byte[] timeBytes = ((BytesColumnVector) col).vector[i];
+ return decodeTime(timeBytes);
+ }
+ throw new IllegalArgumentException();
+ }
+
+ private enum TestType {
+ LONG2, STRING_LONG
+ }
+
    private void compareToUDFYearLong(long t, int y) {
      UDFYear udf = new UDFYear();
      TimestampWritable tsw = toTimestampWritable(t);
@@ -120,20 +233,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFYearLong(VectorizedRowBatch batch) {
- /* col[1] = UDFYear(col[0]) */
- VectorUDFYearLong udf = new VectorUDFYearLong(0, 1);
+ private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf = null;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFYearLong(0, 1);
+ } else {
+ udf = new VectorUDFYearString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFYearLong(t, (int) y);
        } else {
@@ -142,40 +258,54 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFYearLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFYear(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFYearLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFYearLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFYearLong(batch);
+ verifyUDFYear(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFYearLong() {
+ testVectorUDFYear(TestType.LONG2);
    }
- /*end-macro*/

+ @Test
+ public void testVectorUDFYearString() {
+ testVectorUDFYear(TestType.STRING_LONG);
+
+ VectorizedRowBatch batch = getVectorizedRowBatchStringLong(new byte[] {'2', '2', '0', '1', '3'}, 1, 3);
+ VectorExpression udf = new VectorUDFYearString(0, 1);
+ udf.evaluate(batch);
+ LongColumnVector lcv = (LongColumnVector) batch.cols[1];
+ Assert.assertEquals(false, batch.cols[0].isNull[0]);
+ Assert.assertEquals(true, lcv.isNull[0]);
+ }

    private void compareToUDFDayOfMonthLong(long t, int y) {
      UDFDayOfMonth udf = new UDFDayOfMonth();
@@ -184,20 +314,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFDayOfMonthLong(VectorizedRowBatch batch) {
- /* col[1] = UDFDayOfMonth(col[0]) */
- VectorUDFDayOfMonthLong udf = new VectorUDFDayOfMonthLong(0, 1);
+ private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf = null;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFDayOfMonthLong(0, 1);
+ } else {
+ udf = new VectorUDFDayOfMonthString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFDayOfMonthLong(t, (int) y);
        } else {
@@ -206,37 +339,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFDayOfMonthLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFDayOfMonth(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFDayOfMonthLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFDayOfMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFDayOfMonthLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFDayOfMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFDayOfMonthLong(batch);
+ verifyUDFDayOfMonth(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFDayOfMonthLong() {
+ testVectorUDFDayOfMonth(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFDayOfMonthString() {
+ testVectorUDFDayOfMonth(TestType.STRING_LONG);
    }

    private void compareToUDFHourLong(long t, int y) {
@@ -246,20 +388,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFHourLong(VectorizedRowBatch batch) {
- /* col[1] = UDFHour(col[0]) */
- VectorUDFHourLong udf = new VectorUDFHourLong(0, 1);
+ private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf = null;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFHourLong(0, 1);
+ } else {
+ udf = new VectorUDFHourString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFHourLong(t, (int) y);
        } else {
@@ -268,37 +413,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFHourLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFHour(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFHourLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFHour(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFHourLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFHour(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFHourLong(batch);
+ verifyUDFHour(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFHourLong() {
+ testVectorUDFHour(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFHourString() {
+ testVectorUDFHour(TestType.STRING_LONG);
    }

    private void compareToUDFMinuteLong(long t, int y) {
@@ -308,20 +462,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFMinuteLong(VectorizedRowBatch batch) {
- /* col[1] = UDFMinute(col[0]) */
- VectorUDFMinuteLong udf = new VectorUDFMinuteLong(0, 1);
+ private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf = null;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFMinuteLong(0, 1);
+ } else {
+ udf = new VectorUDFMinuteString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFMinuteLong(t, (int) y);
        } else {
@@ -330,37 +487,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFMinuteLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFMinute(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFMinuteLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFMinute(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFMinuteLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFMinute(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFMinuteLong(batch);
+ verifyUDFMinute(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFMinuteLong() {
+ testVectorUDFMinute(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFMinuteString() {
+ testVectorUDFMinute(TestType.STRING_LONG);
    }

    private void compareToUDFMonthLong(long t, int y) {
@@ -370,20 +536,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFMonthLong(VectorizedRowBatch batch) {
- /* col[1] = UDFMonth(col[0]) */
- VectorUDFMonthLong udf = new VectorUDFMonthLong(0, 1);
+ private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFMonthLong(0, 1);
+ } else {
+ udf = new VectorUDFMonthString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFMonthLong(t, (int) y);
        } else {
@@ -392,37 +561,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFMonthLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFMonth(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFMonthLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFMonthLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFMonthLong(batch);
+ verifyUDFMonth(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFMonthLong() {
+ testVectorUDFMonth(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFMonthString() {
+ testVectorUDFMonth(TestType.STRING_LONG);
    }

    private void compareToUDFSecondLong(long t, int y) {
@@ -432,20 +610,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFSecondLong(VectorizedRowBatch batch) {
- /* col[1] = UDFSecond(col[0]) */
- VectorUDFSecondLong udf = new VectorUDFSecondLong(0, 1);
+ private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFSecondLong(0, 1);
+ } else {
+ udf = new VectorUDFSecondString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
          if (!batch.cols[in].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFSecondLong(t, (int) y);
        } else {
@@ -454,37 +635,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFSecondLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFSecond(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFSecondLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFSecond(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFSecondLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFSecond(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFSecondLong(batch);
+ verifyUDFSecond(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFSecondLong() {
+ testVectorUDFSecond(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFSecondString() {
+ testVectorUDFSecond(TestType.STRING_LONG);
    }

    private LongWritable getLongWritable(TimestampWritable i) {
@@ -508,20 +698,23 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFUnixTimeStampLong(VectorizedRowBatch batch) {
- /* col[1] = UDFUnixTimeStamp(col[0]) */
- VectorUDFUnixTimeStampLong udf = new VectorUDFUnixTimeStampLong(0, 1);
+ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFUnixTimeStampLong(0, 1);
+ } else {
+ udf = new VectorUDFUnixTimeStampString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
- if (!batch.cols[in].noNulls) {
+ if (!batch.cols[out].noNulls) {
            Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
          }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFUnixTimeStampLong(t, y);
        } else {
@@ -530,37 +723,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFUnixTimeStampLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFUnixTimeStamp(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFUnixTimeStampLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFUnixTimeStamp(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFUnixTimeStampLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFUnixTimeStamp(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFUnixTimeStampLong(batch);
+ verifyUDFUnixTimeStamp(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFUnixTimeStampLong() {
+ testVectorUDFUnixTimeStamp(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFUnixTimeStampString() {
+ testVectorUDFUnixTimeStamp(TestType.STRING_LONG);
    }

    private void compareToUDFWeekOfYearLong(long t, int y) {
@@ -570,20 +772,20 @@ public class TestVectorTimestampExpressi
      Assert.assertEquals(res.get(), y);
    }

- private void verifyUDFWeekOfYearLong(VectorizedRowBatch batch) {
- /* col[1] = UDFWeekOfYear(col[0]) */
- VectorUDFWeekOfYearLong udf = new VectorUDFWeekOfYearLong(0, 1);
+ private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) {
+ VectorExpression udf;
+ if (testType == TestType.LONG2) {
+ udf = new VectorUDFWeekOfYearLong(0, 1);
+ } else {
+ udf = new VectorUDFWeekOfYearString(0, 1);
+ }
      udf.evaluate(batch);
      final int in = 0;
      final int out = 1;
- Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls);

      for (int i = 0; i < batch.size; i++) {
        if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
- if (!batch.cols[in].noNulls) {
- Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
- }
- long t = ((LongColumnVector) batch.cols[in]).vector[i];
+ long t = readVectorElementAt(batch.cols[in], i);
          long y = ((LongColumnVector) batch.cols[out]).vector[i];
          compareToUDFWeekOfYearLong(t, (int) y);
        } else {
@@ -592,37 +794,46 @@ public class TestVectorTimestampExpressi
      }
    }

- @Test
- public void testVectorUDFWeekOfYearLong() {
- VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0},
- VectorizedRowBatch.DEFAULT_SIZE);
+ private void testVectorUDFWeekOfYear(TestType testType) {
+ VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0},
+ VectorizedRowBatch.DEFAULT_SIZE, testType);
      Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
      Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);

      long[] boundaries = getAllBoundaries();
- batch = getVectorizedRowBatchLong2(boundaries, boundaries.length);
- verifyUDFWeekOfYearLong(batch);
+ batch = getVectorizedRowBatch(boundaries, boundaries.length, testType);
+ verifyUDFWeekOfYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);

- batch = getVectorizedRowBatchLong2(new long[] {0}, 1);
+ batch = getVectorizedRowBatch(new long[] {0}, 1, testType);
      batch.cols[0].isRepeating = true;
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);
      batch.cols[0].noNulls = false;
      batch.cols[0].isNull[0] = true;
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);

- batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE);
- verifyUDFWeekOfYearLong(batch);
+ batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType);
+ verifyUDFWeekOfYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);
      TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
- verifyUDFWeekOfYearLong(batch);
+ verifyUDFWeekOfYear(batch, testType);
+ }
+
+ @Test
+ public void testVectorUDFWeekOfYearLong() {
+ testVectorUDFWeekOfYear(TestType.LONG2);
+ }
+
+ @Test
+ public void testVectorUDFWeekOfYearString() {
+ testVectorUDFWeekOfYear(TestType.STRING_LONG);
    }

    public static void main(String[] args) {
@@ -633,6 +844,12 @@ public class TestVectorTimestampExpressi
      self.testVectorUDFHourLong();
      self.testVectorUDFWeekOfYearLong();
      self.testVectorUDFUnixTimeStampLong();
+ self.testVectorUDFYearString();
+ self.testVectorUDFMonthString();
+ self.testVectorUDFDayOfMonthString();
+ self.testVectorUDFHourString();
+ self.testVectorUDFWeekOfYearString();
+ self.testVectorUDFUnixTimeStampString();
     }
  }


Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q?rev=1547185&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q Mon Dec 2 20:48:28 2013
@@ -0,0 +1,116 @@
+SET hive.vectorized.execution.enabled = true;
+
+-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end.
+
+CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC;
+
+INSERT OVERWRITE TABLE alltypesorc_string
+SELECT
+ to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'),
+ CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING)
+FROM alltypesorc
+LIMIT 40;
+
+CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC;
+
+INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1;
+INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1;
+
+EXPLAIN SELECT
+ to_unix_timestamp(ctimestamp1),
+ year(ctimestamp1),
+ month(ctimestamp1),
+ day(ctimestamp1),
+ dayofmonth(ctimestamp1),
+ weekofyear(ctimestamp1),
+ hour(ctimestamp1),
+ minute(ctimestamp1),
+ second(ctimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ to_unix_timestamp(ctimestamp1),
+ year(ctimestamp1),
+ month(ctimestamp1),
+ day(ctimestamp1),
+ dayofmonth(ctimestamp1),
+ weekofyear(ctimestamp1),
+ hour(ctimestamp1),
+ minute(ctimestamp1),
+ second(ctimestamp1)
+FROM alltypesorc_string;
+
+EXPLAIN SELECT
+ to_unix_timestamp(stimestamp1),
+ year(stimestamp1),
+ month(stimestamp1),
+ day(stimestamp1),
+ dayofmonth(stimestamp1),
+ weekofyear(stimestamp1),
+ hour(stimestamp1),
+ minute(stimestamp1),
+ second(stimestamp1)
+FROM alltypesorc_string;
+
+SELECT
+ to_unix_timestamp(stimestamp1),
+ year(stimestamp1),
+ month(stimestamp1),
+ day(stimestamp1),
+ dayofmonth(stimestamp1),
+ weekofyear(stimestamp1),
+ hour(stimestamp1),
+ minute(stimestamp1),
+ second(stimestamp1)
+FROM alltypesorc_string;
+
+EXPLAIN SELECT
+ to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1),
+ year(ctimestamp1) = year(stimestamp1),
+ month(ctimestamp1) = month(stimestamp1),
+ day(ctimestamp1) = day(stimestamp1),
+ dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+ weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+ hour(ctimestamp1) = hour(stimestamp1),
+ minute(ctimestamp1) = minute(stimestamp1),
+ second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string;
+
+-- Should all be true or NULL
+SELECT
+ to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1),
+ year(ctimestamp1) = year(stimestamp1),
+ month(ctimestamp1) = month(stimestamp1),
+ day(ctimestamp1) = day(stimestamp1),
+ dayofmonth(ctimestamp1) = dayofmonth(stimestamp1),
+ weekofyear(ctimestamp1) = weekofyear(stimestamp1),
+ hour(ctimestamp1) = hour(stimestamp1),
+ minute(ctimestamp1) = minute(stimestamp1),
+ second(ctimestamp1) = second(stimestamp1)
+FROM alltypesorc_string;
+
+-- Wrong format. Should all be NULL.
+EXPLAIN SELECT
+ to_unix_timestamp(stimestamp1),
+ year(stimestamp1),
+ month(stimestamp1),
+ day(stimestamp1),
+ dayofmonth(stimestamp1),
+ weekofyear(stimestamp1),
+ hour(stimestamp1),
+ minute(stimestamp1),
+ second(stimestamp1)
+FROM alltypesorc_wrong;
+
+SELECT
+ to_unix_timestamp(stimestamp1),
+ year(stimestamp1),
+ month(stimestamp1),
+ day(stimestamp1),
+ dayofmonth(stimestamp1),
+ weekofyear(stimestamp1),
+ hour(stimestamp1),
+ minute(stimestamp1),
+ second(stimestamp1)
+FROM alltypesorc_wrong;

Search Discussions

Discussion Posts

Previous

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 2 of 2 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedDec 2, '13 at 8:48p
activeDec 2, '13 at 8:48p
posts2
users1
websitehive.apache.org

1 user in discussion

Ehans: 2 posts

People

Translate

site design / logo © 2021 Grokbase