FAQ
Author: heyongqiang
Date: Tue Jul 26 03:22:09 2011
New Revision: 1150978

URL: http://svn.apache.org/viewvc?rev=1150978&view=rev
Log:
HIVE-956: add support of columnar binary serde (Krishna Kumar via He Yongqiang)

Added:
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java
hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/
hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ColumnarStructObjectInspector.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Tue Jul 26 03:22:09 2011
@@ -173,7 +173,9 @@ public abstract class BaseSemanticAnalyz
case HiveParser.TOK_TBLRCFILE:
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
- shared.serde = COLUMNAR_SERDE;
+ if (shared.serde == null) {
+ shared.serde = COLUMNAR_SERDE;
+ }
storageFormat = true;
break;
case HiveParser.TOK_TABLEFILEFORMAT:

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java Tue Jul 26 03:22:09 2011
@@ -51,15 +51,7 @@ import org.apache.hadoop.io.Writable;
* (2) ColumnarSerDe initialize ColumnarStruct's field directly. But under the
* field level, it works like LazySimpleSerDe<br>
*/
-public class ColumnarSerDe implements SerDe {
-
- // We need some initial values in case user don't call initialize()
- private ObjectInspector cachedObjectInspector;
-
- private long serializedSize;
- private SerDeStats stats;
- private boolean lastOperationSerialize;
- private boolean lastOperationDeserialize;
+public class ColumnarSerDe extends ColumnarSerDeBase {

@Override
public String toString() {
@@ -104,65 +96,15 @@ public class ColumnarSerDe implements Se
serdeParams.getNullSequence());

int size = serdeParams.getColumnTypes().size();
- field = new BytesRefWritable[size];
- for (int i = 0; i < size; i++) {
- field[i] = new BytesRefWritable();
- serializeCache.set(i, field[i]);
- }
-
+ super.initialize(size);
LOG.debug("ColumnarSerDe initialized with: columnNames="
+ serdeParams.getColumnNames() + " columnTypes="
+ serdeParams.getColumnTypes() + " separator="
+ Arrays.asList(serdeParams.getSeparators()) + " nullstring="
+ serdeParams.getNullString());
-
- serializedSize = 0;
- stats = new SerDeStats();
- lastOperationSerialize = false;
- lastOperationDeserialize = false;
- }
-
- // The object for storing row data
- ColumnarStruct cachedLazyStruct;
-
- /**
- * Deserialize a row from the Writable to a LazyObject.
- */
- public Object deserialize(Writable blob) throws SerDeException {
-
- if (!(blob instanceof BytesRefArrayWritable)) {
- throw new SerDeException(getClass().toString()
- + ": expects BytesRefArrayWritable!");
- }
-
- BytesRefArrayWritable cols = (BytesRefArrayWritable) blob;
- cachedLazyStruct.init(cols);
- lastOperationSerialize = false;
- lastOperationDeserialize = true;
- return cachedLazyStruct;
- }
-
- /**
- * Returns the ObjectInspector for the row.
- */
- public ObjectInspector getObjectInspector() throws SerDeException {
- return cachedObjectInspector;
}

/**
- * Returns the Writable Class after serialization.
- *
- * @see SerDe#getSerializedClass()
- */
- public Class<? extends Writable> getSerializedClass() {
- return BytesRefArrayWritable.class;
- }
-
- BytesRefArrayWritable serializeCache = new BytesRefArrayWritable();
- BytesRefWritable field[];
- ByteStream.Output serializeStream = new ByteStream.Output();
-
- /**
* Serialize a row of data.
*
* @param obj
@@ -244,20 +186,4 @@ public class ColumnarSerDe implements Se
return serializeCache;
}

- /**
- * Returns the statistics after (de)serialization)
- */
-
- public SerDeStats getSerDeStats() {
- // must be different
- assert (lastOperationSerialize != lastOperationDeserialize);
-
- if (lastOperationSerialize) {
- stats.setRawDataSize(serializedSize);
- } else {
- stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize());
- }
- return stats;
-
- }
}

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDeBase.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.columnar;
+
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+public abstract class ColumnarSerDeBase implements SerDe {
+
+ // The object for storing row data
+ ColumnarStructBase cachedLazyStruct;
+ // We need some initial values in case user don't call initialize()
+ protected ObjectInspector cachedObjectInspector;
+
+ protected long serializedSize;
+ protected SerDeStats stats;
+ protected boolean lastOperationSerialize;
+ protected boolean lastOperationDeserialize;
+
+ BytesRefArrayWritable serializeCache = new BytesRefArrayWritable();
+ BytesRefWritable field[];
+ ByteStream.Output serializeStream = new ByteStream.Output();
+
+ @Override
+ public Object deserialize(Writable blob) throws SerDeException {
+ if (!(blob instanceof BytesRefArrayWritable)) {
+ throw new SerDeException(getClass().toString()
+ + ": expects BytesRefArrayWritable!");
+ }
+
+ BytesRefArrayWritable cols = (BytesRefArrayWritable) blob;
+ cachedLazyStruct.init(cols);
+ lastOperationSerialize = false;
+ lastOperationDeserialize = true;
+ return cachedLazyStruct;
+ }
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ // must be different
+ assert (lastOperationSerialize != lastOperationDeserialize);
+
+ if (lastOperationSerialize) {
+ stats.setRawDataSize(serializedSize);
+ } else {
+ stats.setRawDataSize(cachedLazyStruct.getRawDataSerializedSize());
+ }
+ return stats;
+ }
+
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesRefArrayWritable.class;
+ }
+
+ protected void initialize(int size) throws SerDeException {
+ field = new BytesRefWritable[size];
+ for (int i = 0; i < size; i++) {
+ field[i] = new BytesRefWritable();
+ serializeCache.set(i, field[i]);
+ }
+
+ serializedSize = 0;
+ stats = new SerDeStats();
+ lastOperationSerialize = false;
+ lastOperationDeserialize = false;
+ }
+
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java Tue Jul 26 03:22:09 2011
@@ -18,20 +18,15 @@

package org.apache.hadoop.hive.serde2.columnar;

-import java.io.IOException;
import java.util.ArrayList;
-import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.SerDeStatsStruct;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
-import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.Text;

/**
@@ -41,12 +36,10 @@ import org.apache.hadoop.io.Text;
* lazy way.
*
*/
-public class ColumnarStruct implements SerDeStatsStruct{
+public class ColumnarStruct extends ColumnarStructBase {

private static final Log LOG = LogFactory.getLog(ColumnarStruct.class);

- int[] prjColIDs = null; // list of projected column IDs
-
Text nullSequence;
int lengthNullSequence;

@@ -72,207 +65,28 @@ public class ColumnarStruct implements S
*/
public ColumnarStruct(ObjectInspector oi,
ArrayList<Integer> notSkippedColumnIDs, Text nullSequence) {
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
- .getAllStructFieldRefs();
- int num = fieldRefs.size();
-
- fieldInfoList = new FieldInfo[num];
-
+ super(oi, notSkippedColumnIDs);
if (nullSequence != null) {
this.nullSequence = nullSequence;
this.lengthNullSequence = nullSequence.getLength();
}
-
- // if no columns is set to be skipped, add all columns in
- // 'notSkippedColumnIDs'
- if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
- for (int i = 0; i < num; i++) {
- notSkippedColumnIDs.add(i);
- }
- }
-
- for (int i = 0; i < num; i++) {
- fieldInfoList[i] = new FieldInfo(
- LazyFactory.createLazyObject(fieldRefs.get(i)
- .getFieldObjectInspector()),
- !notSkippedColumnIDs.contains(i));
- }
-
- // maintain a list of non-NULL column IDs
- int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
- .size();
- prjColIDs = new int[min];
- for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
- int readCol = notSkippedColumnIDs.get(i).intValue();
- if (readCol < num) {
- prjColIDs[index] = readCol;
- index++;
- }
- }
- }
-
- /**
- * Get one field out of the struct.
- *
- * If the field is a primitive field, return the actual object. Otherwise
- * return the LazyObject. This is because PrimitiveObjectInspector does not
- * have control over the object used by the user - the user simply directly
- * use the Object instead of going through Object
- * PrimitiveObjectInspector.get(Object).
- *
- * NOTE: separator and nullSequence has to be the same each time this method
- * is called. These two parameters are used only once to parse each record.
- *
- * @param fieldID
- * The field ID
- * @param nullSequence
- * The sequence for null value
- * @return The field as a LazyObject
- */
- public Object getField(int fieldID) {
- return fieldInfoList[fieldID].uncheckedGetField();
- }
-
- class FieldInfo {
- LazyObject field;
- /*
- * use an array instead of only one object in case in future hive does not do
- * the byte copy.
- */
- ByteArrayRef cachedByteArrayRef;
- BytesRefWritable rawBytesField;
- boolean inited;
- boolean fieldSkipped;
-
- public FieldInfo(LazyObject lazyObject, boolean fieldSkipped) {
- field = lazyObject;
- cachedByteArrayRef = new ByteArrayRef();
- if (fieldSkipped) {
- this.fieldSkipped = true;
- inited = true;
- } else {
- inited = false;
- }
- }
-
- /*
- * ============================ [PERF] ===================================
- * This function is called for every row. Setting up the selected/projected
- * columns at the first call, and don't do that for the following calls.
- * Ideally this should be done in the constructor where we don't need to
- * branch in the function for each row.
- * =========================================================================
- */
- public void init(BytesRefWritable col) {
- if (col != null) {
- rawBytesField= col;
- inited = false;
- } else {
- // select columns that actually do not exist in the file.
- fieldSkipped = true;
- }
- }
-
- /**
- * Return the uncompressed size of this field
- */
- public long getSerializedSize(){
- if (rawBytesField == null) {
- return 0;
- }
- return rawBytesField.getLength();
- }
-
- /**
- * Get the field out of the row without checking parsed. This is called by
- * both getField and getFieldsAsList.
- *
- * @param fieldID
- * The id of the field starting from 0.
- * @param nullSequence
- * The sequence representing NULL value.
- * @return The value of the field
- */
- protected Object uncheckedGetField() {
- if (fieldSkipped) {
- return null;
- }
- if (!inited) {
- try {
- cachedByteArrayRef.setData(rawBytesField.getData());
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- field.init(cachedByteArrayRef, rawBytesField
- .getStart(), rawBytesField.getLength());
- inited = true;
- }
-
-
- int fieldLen = rawBytesField.length;
- if (fieldLen == lengthNullSequence) {
- byte[] data = cachedByteArrayRef.getData();
-
- if (LazyUtils.compare(data, rawBytesField.getStart(), fieldLen,
- nullSequence.getBytes(), 0, lengthNullSequence) == 0) {
- return null;
- }
- }
-
- return field.getObject();
-
- }
}

- FieldInfo[] fieldInfoList = null;
-
-
- /*
- * ============================ [PERF] ===================================
- * This function is called for every row. Setting up the selected/projected
- * columns at the first call, and don't do that for the following calls.
- * Ideally this should be done in the constructor where we don't need to
- * branch in the function for each row.
- * =========================================================================
- */
- public void init(BytesRefArrayWritable cols) {
- for (int i = 0; i < prjColIDs.length; ++i) {
- int fieldIndex = prjColIDs[i];
- if (fieldIndex < cols.size()) {
- fieldInfoList[fieldIndex].init(cols.unCheckedGet(fieldIndex));
- } else {
- // select columns that actually do not exist in the file.
- fieldInfoList[fieldIndex].init(null);
- }
- }
- }
-
- ArrayList<Object> cachedList;
-
- /**
- * Get the values of the fields as an ArrayList.
- *
- * @param nullSequence
- * The sequence for the NULL value
- * @return The values of the fields as an ArrayList.
- */
- public ArrayList<Object> getFieldsAsList() {
- if (cachedList == null) {
- cachedList = new ArrayList<Object>();
- } else {
- cachedList.clear();
- }
- for (int i = 0; i < fieldInfoList.length; i++) {
- cachedList.add(fieldInfoList[i].uncheckedGetField());
- }
- return cachedList;
- }
-
- public long getRawDataSerializedSize() {
- long serializedSize = 0;
- for (int i = 0; i < fieldInfoList.length; ++i) {
- serializedSize += fieldInfoList[i].getSerializedSize();
- }
- return serializedSize;
+ @Override
+ protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef,
+ int start, int fieldLen) {
+ if (fieldLen == lengthNullSequence) {
+ byte[] data = cachedByteArrayRef.getData();
+ if (LazyUtils.compare(data, start, fieldLen,
+ nullSequence.getBytes(), 0, lengthNullSequence) == 0) {
+ return -1;
+ }
+ }
+ return fieldLen;
+ }
+
+ @Override
+ protected LazyObjectBase createLazyObjectBase(ObjectInspector objectInspector) {
+ return LazyFactory.createLazyObject(objectInspector);
}
}

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.columnar;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.SerDeStatsStruct;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+public abstract class ColumnarStructBase implements SerDeStatsStruct {
+
+ class FieldInfo {
+ LazyObjectBase field;
+ /*
+ * use an array instead of only one object in case in future hive does not do
+ * the byte copy.
+ */
+ ByteArrayRef cachedByteArrayRef;
+ BytesRefWritable rawBytesField;
+ boolean inited;
+ boolean fieldSkipped;
+ ObjectInspector objectInspector;
+
+ public FieldInfo(LazyObjectBase lazyObject, boolean fieldSkipped, ObjectInspector oi) {
+ field = lazyObject;
+ cachedByteArrayRef = new ByteArrayRef();
+ objectInspector = oi;
+ if (fieldSkipped) {
+ this.fieldSkipped = true;
+ inited = true;
+ } else {
+ inited = false;
+ }
+ }
+
+ /*
+ * ============================ [PERF] ===================================
+ * This function is called for every row. Setting up the selected/projected
+ * columns at the first call, and don't do that for the following calls.
+ * Ideally this should be done in the constructor where we don't need to
+ * branch in the function for each row.
+ * =========================================================================
+ */
+ public void init(BytesRefWritable col) {
+ if (col != null) {
+ rawBytesField = col;
+ inited = false;
+ } else {
+ // select columns that actually do not exist in the file.
+ fieldSkipped = true;
+ }
+ }
+
+ /**
+ * Return the uncompressed size of this field
+ */
+ public long getSerializedSize() {
+ if (rawBytesField == null) {
+ return 0;
+ }
+ return rawBytesField.getLength();
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @return The value of the field
+ */
+ protected Object uncheckedGetField() {
+ if (fieldSkipped) {
+ return null;
+ }
+ if (!inited) {
+ try {
+ cachedByteArrayRef.setData(rawBytesField.getData());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ inited = true;
+ int byteLength = getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(),
+ rawBytesField.getLength());
+ if (byteLength == -1) {
+ return null;
+ }
+
+ field.init(cachedByteArrayRef, rawBytesField.getStart(), byteLength);
+ return field.getObject();
+ } else {
+ if (getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(),
+ rawBytesField.getLength()) == -1) {
+ return null;
+ }
+ return field.getObject();
+ }
+ }
+ }
+
+ protected int[] prjColIDs = null;
+ private FieldInfo[] fieldInfoList = null;
+ private ArrayList<Object> cachedList;
+
+ public ColumnarStructBase(ObjectInspector oi,
+ ArrayList<Integer> notSkippedColumnIDs) {
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
+ .getAllStructFieldRefs();
+ int num = fieldRefs.size();
+
+ fieldInfoList = new FieldInfo[num];
+
+ // if no columns is set to be skipped, add all columns in
+ // 'notSkippedColumnIDs'
+ if (notSkippedColumnIDs == null || notSkippedColumnIDs.size() == 0) {
+ for (int i = 0; i < num; i++) {
+ notSkippedColumnIDs.add(i);
+ }
+ }
+
+ for (int i = 0; i < num; i++) {
+ ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector();
+ fieldInfoList[i] = new FieldInfo(
+ createLazyObjectBase(foi),
+ !notSkippedColumnIDs.contains(i),
+ foi);
+ }
+
+ // maintain a list of non-NULL column IDs
+ int min = notSkippedColumnIDs.size() > num ? num : notSkippedColumnIDs
+ .size();
+ prjColIDs = new int[min];
+ for (int i = 0, index = 0; i < notSkippedColumnIDs.size(); ++i) {
+ int readCol = notSkippedColumnIDs.get(i).intValue();
+ if (readCol < num) {
+ prjColIDs[index] = readCol;
+ index++;
+ }
+ }
+ }
+
+ /**
+ * Get one field out of the struct.
+ *
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * NOTE: separator and nullSequence has to be the same each time this method
+ * is called. These two parameters are used only once to parse each record.
+ *
+ * @param fieldID
+ * The field ID
+ * @param nullSequence
+ * The sequence for null value
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ return fieldInfoList[fieldID].uncheckedGetField();
+ }
+
+ /**
+ * Check if the object is null and return the length of the stream
+ *
+ * @param objectInspector
+ * @param cachedByteArrayRef
+ * the bytes of the object
+ * @param start
+ * the start offset
+ * @param length
+ * the length
+ *
+ * @return -1 for null, >=0 for length
+ */
+ protected abstract int getLength(ObjectInspector objectInspector,
+ ByteArrayRef cachedByteArrayRef, int start, int length);
+
+ /**
+ * create the lazy object for this field
+ *
+ * @param objectInspector
+ * the object inspector for the field
+ * @return the lazy object for the field
+ */
+ protected abstract LazyObjectBase createLazyObjectBase(ObjectInspector objectInspector);
+
+ public void init(BytesRefArrayWritable cols) {
+ for (int i = 0; i < prjColIDs.length; ++i) {
+ int fieldIndex = prjColIDs[i];
+ if (fieldIndex < cols.size()) {
+ fieldInfoList[fieldIndex].init(cols.unCheckedGet(fieldIndex));
+ } else {
+ // select columns that actually do not exist in the file.
+ fieldInfoList[fieldIndex].init(null);
+ }
+ }
+ }
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @param nullSequence
+ * The sequence for the NULL value
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fieldInfoList.length; i++) {
+ cachedList.add(fieldInfoList[i].uncheckedGetField());
+ }
+ return cachedList;
+ }
+
+ public long getRawDataSerializedSize() {
+ long serializedSize = 0;
+ for (int i = 0; i < fieldInfoList.length; ++i) {
+ serializedSize += fieldInfoList[i].getSerializedSize();
+ }
+ return serializedSize;
+ }
+
+}

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,100 @@
+package org.apache.hadoop.hive.serde2.columnar;
+
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.Writable;
+
+
+/**
+ * LazyBinaryColumnarSerDe. This serde combines elements of columnar serde and lazybinary serde
+ * to produce a serde which serializes columns into a BytesRefArrayWritable in a compact binary
+ * format and which is deserialized in a lazy, i.e. on-demand fashion.
+ *
+ */
+public class LazyBinaryColumnarSerDe extends ColumnarSerDeBase {
+
+ private List<String> columnNames;
+ private List<TypeInfo> columnTypes;
+
+ @Override
+ public String toString() {
+ return getClass().toString()
+ + "["
+ + columnNames
+ + ":"
+ + columnTypes + "]";
+ }
+
+ @Override
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+ SerDeParameters serdeParams = new SerDeParameters();
+ LazyUtils.extractColumnInfo(tbl, serdeParams, getClass().getName());
+ columnNames = serdeParams.getColumnNames();
+ columnTypes = serdeParams.getColumnTypes();
+
+ cachedObjectInspector = LazyBinaryFactory.createColumnarStructInspector(
+ columnNames, columnTypes);
+ java.util.ArrayList<Integer> notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
+ cachedLazyStruct = new LazyBinaryColumnarStruct(cachedObjectInspector, notSkipIDs);
+ int size = columnTypes.size();
+ super.initialize(size);
+ }
+
+ static final byte[] INVALID_UTF__SINGLE_BYTE = {(byte)Integer.parseInt("10111111", 2)};
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: "
+ + objInspector.getTypeName());
+ }
+
+ StructObjectInspector soi = (StructObjectInspector) objInspector;
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ List<Object> list = soi.getStructFieldsDataAsList(obj);
+
+ boolean warnedOnceNullMapKey = false;
+ serializeStream.reset();
+ serializedSize = 0;
+ int streamOffset = 0;
+ // Serialize each field
+ for (int i = 0; i < fields.size(); i++) {
+ // Get the field objectInspector and the field object.
+ ObjectInspector foi = fields.get(i).getFieldObjectInspector();
+ Object f = (list == null ? null : list.get(i));
+ //empty strings are marked by an invalid utf single byte sequence. A valid utf stream cannot
+ //produce this sequence
+ if ((f != null) && (foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE))
+ && ((PrimitiveObjectInspector) foi).getPrimitiveCategory().equals(
+ PrimitiveObjectInspector.PrimitiveCategory.STRING)
+ && ((StringObjectInspector) foi).getPrimitiveJavaObject(f).length() == 0) {
+ serializeStream.write(INVALID_UTF__SINGLE_BYTE, 0, 1);
+ } else {
+ LazyBinarySerDe.serialize(serializeStream, f, foi, true, warnedOnceNullMapKey);
+ }
+ field[i].set(serializeStream.getData(), streamOffset, serializeStream
+ .getCount()
+ - streamOffset);
+ streamOffset = serializeStream.getCount();
+ }
+ serializedSize = serializeStream.getCount();
+ lastOperationSerialize = true;
+ lastOperationDeserialize = false;
+ return serializeCache;
+ }
+}

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.columnar;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+public class LazyBinaryColumnarStruct extends ColumnarStructBase {
+
+ public LazyBinaryColumnarStruct(ObjectInspector oi, ArrayList<Integer> notSkippedColumnIDs) {
+ super(oi, notSkippedColumnIDs);
+ }
+
+ static VInt vInt = new LazyBinaryUtils.VInt();
+
+ @Override
+ protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef,
+ int start, int length) {
+ if (length == 0) {
+ return -1;
+ }
+ Category category = objectInspector.getCategory();
+ if (category.equals(Category.PRIMITIVE)) {
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
+ .getPrimitiveCategory();
+ if (primitiveCategory.equals(PrimitiveCategory.STRING) && (length == 1) &&
+ (cachedByteArrayRef.getData()[start]
+ == LazyBinaryColumnarSerDe.INVALID_UTF__SINGLE_BYTE[0])) {
+ return 0;
+ }
+ }
+ return length;
+ }
+
+ @Override
+ protected LazyObjectBase createLazyObjectBase(ObjectInspector objectInspector) {
+ return LazyBinaryFactory.createLazyBinaryObject(objectInspector);
+ }
+}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java Tue Jul 26 03:22:09 2011
@@ -214,7 +214,7 @@ public final class LazyFactory {
separators, 1, nullSequence, escaped, escapeChar));
}
return ObjectInspectorFactory.getColumnarStructObjectInspector(columnNames,
- columnObjectInspectors, nullSequence);
+ columnObjectInspectors);
}

private LazyFactory() {

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java Tue Jul 26 03:22:09 2011
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.serde2.obj
* A LazyObject can represent any primitive object or hierarchical object like
* array, map or struct.
*/
-public abstract class LazyObject<OI extends ObjectInspector> {
+public abstract class LazyObject<OI extends ObjectInspector> extends LazyObjectBase {

OI oi;

@@ -40,27 +40,6 @@ public abstract class LazyObject<OI exte
this.oi = oi;
}

- /**
- * Set the data for this LazyObject. We take ByteArrayRef instead of byte[] so
- * that we will be able to drop the reference to byte[] by a single
- * assignment. The ByteArrayRef object can be reused across multiple rows.
- *
- * @param bytes
- * The wrapper of the byte[].
- * @param start
- * The start position inside the bytes.
- * @param length
- * The length of the data, starting from "start"
- * @see ByteArrayRef
- */
- public abstract void init(ByteArrayRef bytes, int start, int length);
-
- /**
- * If the LazyObject is a primitive Object, then deserialize it and return the
- * actual primitive Object. Otherwise (array, map, struct), return this.
- */
- public abstract Object getObject();
-
@Override
public abstract int hashCode();


Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.lazy;
+
+public abstract class LazyObjectBase {
+
+ /**
+ * Set the data for this LazyObjectBase. We take ByteArrayRef instead of byte[] so
+ * that we will be able to drop the reference to byte[] by a single
+ * assignment. The ByteArrayRef object can be reused across multiple rows.
+ *
+ * @param bytes
+ * The wrapper of the byte[].
+ * @param start
+ * The start position inside the bytes.
+ * @param length
+ * The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(ByteArrayRef bytes, int start, int length);
+
+ /**
+ * If the LazyObjectBase is a primitive Object, then deserialize it and return the
+ * actual primitive Object. Otherwise (array, map, struct), return this.
+ */
+ public abstract Object getObject();
+
+}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java Tue Jul 26 03:22:09 2011
@@ -19,7 +19,6 @@
package org.apache.hadoop.hive.serde2.lazy;

import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -232,39 +231,7 @@ public class LazySimpleSerDe implements
serdeParams.lastColumnTakesRest = (lastColumnTakesRestString != null && lastColumnTakesRestString
.equalsIgnoreCase("true"));

- // Read the configuration parameters
- String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
- // NOTE: if "columns.types" is missing, all columns will be of String type
- String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
-
- // Parse the configuration parameters
-
- if (columnNameProperty != null && columnNameProperty.length() > 0) {
- serdeParams.columnNames = Arrays.asList(columnNameProperty.split(","));
- } else {
- serdeParams.columnNames = new ArrayList<String>();
- }
- if (columnTypeProperty == null) {
- // Default type: all string
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < serdeParams.columnNames.size(); i++) {
- if (i > 0) {
- sb.append(":");
- }
- sb.append(Constants.STRING_TYPE_NAME);
- }
- columnTypeProperty = sb.toString();
- }
-
- serdeParams.columnTypes = TypeInfoUtils
- .getTypeInfosFromTypeString(columnTypeProperty);
-
- if (serdeParams.columnNames.size() != serdeParams.columnTypes.size()) {
- throw new SerDeException(serdeName + ": columns has "
- + serdeParams.columnNames.size()
- + " elements while columns.types has "
- + serdeParams.columnTypes.size() + " elements!");
- }
+ LazyUtils.extractColumnInfo(tbl, serdeParams, serdeName);

// Create the LazyObject for storing the rows
serdeParams.rowTypeInfo = TypeInfoFactory.getStructTypeInfo(

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Tue Jul 26 03:22:09 2011
@@ -21,7 +21,13 @@ import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
-
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Properties;
+
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
@@ -31,6 +37,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;

/**
@@ -222,6 +229,43 @@ public final class LazyUtils {
return hash;
}

+ public static void extractColumnInfo(Properties tbl, SerDeParameters serdeParams,
+ String serdeName) throws SerDeException {
+ // Read the configuration parameters
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ // NOTE: if "columns.types" is missing, all columns will be of String type
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+
+ // Parse the configuration parameters
+
+ if (columnNameProperty != null && columnNameProperty.length() > 0) {
+ serdeParams.columnNames = Arrays.asList(columnNameProperty.split(","));
+ } else {
+ serdeParams.columnNames = new ArrayList<String>();
+ }
+ if (columnTypeProperty == null) {
+ // Default type: all string
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < serdeParams.columnNames.size(); i++) {
+ if (i > 0) {
+ sb.append(":");
+ }
+ sb.append(Constants.STRING_TYPE_NAME);
+ }
+ columnTypeProperty = sb.toString();
+ }
+
+ serdeParams.columnTypes = TypeInfoUtils
+ .getTypeInfosFromTypeString(columnTypeProperty);
+
+ if (serdeParams.columnNames.size() != serdeParams.columnTypes.size()) {
+ throw new SerDeException(serdeName + ": columns has "
+ + serdeParams.columnNames.size()
+ + " elements while columns.types has "
+ + serdeParams.columnTypes.size() + " elements!");
+ }
+ }
+
private LazyUtils() {
// prevent instantiation
}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Tue Jul 26 03:22:09 2011
@@ -17,10 +17,14 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;

+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
@@ -32,6 +36,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableVoidObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

/**
* LazyBinaryFactory.
@@ -91,4 +96,16 @@ public final class LazyBinaryFactory {
private LazyBinaryFactory() {
// prevent instantiation
}
+
+ public static ObjectInspector createColumnarStructInspector(List<String> columnNames,
+ List<TypeInfo> columnTypes) {
+ ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
+ columnTypes.size());
+ for (int i = 0; i < columnTypes.size(); i++) {
+ columnObjectInspectors
+ .add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(columnTypes.get(i)));
+ }
+ return ObjectInspectorFactory.getColumnarStructObjectInspector(columnNames,
+ columnObjectInspectors);
+ }
}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java Tue Jul 26 03:22:09 2011
@@ -17,7 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.lazybinary;

-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

/**
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.serde2.obj
* A LazyBinaryObject can represent any primitive object or hierarchical object
* like string, list, map or struct.
*/
-public abstract class LazyBinaryObject<OI extends ObjectInspector> {
+public abstract class LazyBinaryObject<OI extends ObjectInspector> extends LazyObjectBase {

OI oi;

@@ -42,30 +42,6 @@ public abstract class LazyBinaryObject<O
this.oi = oi;
}

- /**
- * Set the data for this LazyBinaryObject. We take ByteArrayRef instead of
- * byte[] so that we will be able to drop the reference to byte[] by a single
- * assignment. The ByteArrayRef object can be reused across multiple rows.
- *
- * Never call this function if the object represent a null!!!
- *
- * @param bytes
- * The wrapper of the byte[].
- * @param start
- * The start position inside the bytes.
- * @param length
- * The length of the data, starting from "start"
- * @see ByteArrayRef
- */
- public abstract void init(ByteArrayRef bytes, int start, int length);
-
- /**
- * If the LazyBinaryObject is a primitive Object, then deserialize it and
- * return the actual primitive Object. Otherwise (string, list, map, struct),
- * return this.
- */
- public abstract Object getObject();
-
@Override
public abstract int hashCode();
}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Jul 26 03:22:09 2011
@@ -179,6 +179,7 @@ public class LazyBinarySerDe implements
*/
BytesWritable serializeBytesWritable = new BytesWritable();
ByteStream.Output serializeByteStream = new ByteStream.Output();
+ boolean nullMapKey = false;

/**
* Serialize an object to a byte buffer in a binary compact way.
@@ -195,8 +196,8 @@ public class LazyBinarySerDe implements

serializeByteStream.reset();
// serialize the row as a struct
- serializeStruct(serializeByteStream, obj,
- (StructObjectInspector) objInspector);
+ nullMapKey = serializeStruct(serializeByteStream, obj,
+ (StructObjectInspector) objInspector, nullMapKey);
// return the serialized bytes
serializeBytesWritable.set(serializeByteStream.getData(), 0,
serializeByteStream.getCount());
@@ -207,8 +208,6 @@ public class LazyBinarySerDe implements
return serializeBytesWritable;
}

- boolean nullMapKey = false;
-
/**
* Serialize a struct object without writing the byte size. This function is
* shared by both row serialization and struct serialization.
@@ -219,12 +218,16 @@ public class LazyBinarySerDe implements
* the struct object to serialize
* @param objInspector
* the struct object inspector
+ * @param warnedOnceNullMapKey a boolean indicating whether a warning
+ * has been issued once already when encountering null map keys
+ * @return a boolean indicating whether a warning for null map keys has been issued
+ * once already
*/
- private void serializeStruct(Output byteStream, Object obj,
- StructObjectInspector soi) {
+ private static boolean serializeStruct(Output byteStream, Object obj,
+ StructObjectInspector soi, boolean warnedOnceNullMapKey) {
// do nothing for null struct
if (null == obj) {
- return;
+ return warnedOnceNullMapKey;
}
/*
* Interleave serializing one null byte and 8 struct fields in each round,
@@ -243,15 +246,16 @@ public class LazyBinarySerDe implements
// if this is the last element and serialize the
// corresponding 8 struct fields at the same time
if (7 == i % 8 || i == size - 1) {
- serializeByteStream.write(nullByte);
+ byteStream.write(nullByte);
for (int j = lasti; j <= i; j++) {
- serialize(serializeByteStream, soi.getStructFieldData(obj, fields
- .get(j)), fields.get(j).getFieldObjectInspector());
+ warnedOnceNullMapKey = serialize(byteStream, soi.getStructFieldData(obj, fields
+ .get(j)), fields.get(j).getFieldObjectInspector(), false, warnedOnceNullMapKey);
}
lasti = i + 1;
nullByte = 0;
}
}
+ return warnedOnceNullMapKey;
}

/**
@@ -264,13 +268,19 @@ public class LazyBinarySerDe implements
* the object to serialize
* @param objInspector
* the object inspector
+ * @param skipLengthPrefix a boolean indicating whether length prefix is
+ * needed for list/map/struct
+ * @param warnedOnceNullMapKey a boolean indicating whether a warning
+ * has been issued once already when encountering null map keys
+ * @return a boolean indicating whether a warning for null map keys has been issued
+ * once already
*/
- private void serialize(Output byteStream, Object obj,
- ObjectInspector objInspector) {
+ public static boolean serialize(Output byteStream, Object obj,
+ ObjectInspector objInspector, boolean skipLengthPrefix, boolean warnedOnceNullMapKey) {

// do nothing for null object
if (null == obj) {
- return;
+ return warnedOnceNullMapKey;
}

switch (objInspector.getCategory()) {
@@ -278,37 +288,37 @@ public class LazyBinarySerDe implements
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
switch (poi.getPrimitiveCategory()) {
case VOID: {
- return;
+ return warnedOnceNullMapKey;
}
case BOOLEAN: {
boolean v = ((BooleanObjectInspector) poi).get(obj);
byteStream.write((byte) (v ? 1 : 0));
- return;
+ return warnedOnceNullMapKey;
}
case BYTE: {
ByteObjectInspector boi = (ByteObjectInspector) poi;
byte v = boi.get(obj);
byteStream.write(v);
- return;
+ return warnedOnceNullMapKey;
}
case SHORT: {
ShortObjectInspector spoi = (ShortObjectInspector) poi;
short v = spoi.get(obj);
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
- return;
+ return warnedOnceNullMapKey;
}
case INT: {
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(obj);
LazyBinaryUtils.writeVInt(byteStream, v);
- return;
+ return warnedOnceNullMapKey;
}
case LONG: {
LongObjectInspector loi = (LongObjectInspector) poi;
long v = loi.get(obj);
LazyBinaryUtils.writeVLong(byteStream, v);
- return;
+ return warnedOnceNullMapKey;
}
case FLOAT: {
FloatObjectInspector foi = (FloatObjectInspector) poi;
@@ -317,7 +327,7 @@ public class LazyBinarySerDe implements
byteStream.write((byte) (v >> 16));
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
- return;
+ return warnedOnceNullMapKey;
}
case DOUBLE: {
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
@@ -330,18 +340,20 @@ public class LazyBinarySerDe implements
byteStream.write((byte) (v >> 16));
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
- return;
+ return warnedOnceNullMapKey;
}
case STRING: {
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(obj);
/* write byte size of the string which is a vint */
int length = t.getLength();
- LazyBinaryUtils.writeVInt(byteStream, length);
+ if (!skipLengthPrefix) {
+ LazyBinaryUtils.writeVInt(byteStream, length);
+ }
/* write string itself */
byte[] data = t.getBytes();
byteStream.write(data, 0, length);
- return;
+ return warnedOnceNullMapKey;
}
default: {
throw new RuntimeException("Unrecognized type: "
@@ -353,15 +365,18 @@ public class LazyBinarySerDe implements
ListObjectInspector loi = (ListObjectInspector) objInspector;
ObjectInspector eoi = loi.getListElementObjectInspector();

- // 1/ reserve spaces for the byte size of the list
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int listStart = byteStream.getCount();
-
+ int byteSizeStart = 0;
+ int listStart = 0;
+ if (!skipLengthPrefix) {
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ listStart = byteStream.getCount();
+ }
// 2/ write the size of the list as a VInt
int size = loi.getListLength(obj);
LazyBinaryUtils.writeVInt(byteStream, size);
@@ -383,19 +398,21 @@ public class LazyBinarySerDe implements

// 4/ write element by element from the list
for (int eid = 0; eid < size; eid++) {
- serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ warnedOnceNullMapKey = serialize(byteStream, loi.getListElement(obj, eid), eoi,
+ false, warnedOnceNullMapKey);
}

- // 5/ update the list byte size
- int listEnd = byteStream.getCount();
- int listSize = listEnd - listStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (listSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (listSize);
-
- return;
+ if (!skipLengthPrefix) {
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+ }
+ return warnedOnceNullMapKey;
}
case MAP: {
MapObjectInspector moi = (MapObjectInspector) objInspector;
@@ -403,15 +420,19 @@ public class LazyBinarySerDe implements
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(obj);

- // 1/ reserve spaces for the byte size of the map
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int mapStart = byteStream.getCount();
-
+ int byteSizeStart = 0;
+ int mapStart = 0;
+ if (!skipLengthPrefix) {
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ mapStart = byteStream.getCount();
+ }
+
// 2/ write the size of the map which is a VInt
int size = map.size();
LazyBinaryUtils.writeVInt(byteStream, size);
@@ -423,8 +444,8 @@ public class LazyBinarySerDe implements
// set the bit to 1 if a key is not null
if (null != entry.getKey()) {
nullByte |= 1 << (b % 8);
- } else if (!nullMapKey) {
- nullMapKey = true;
+ } else if (!warnedOnceNullMapKey) {
+ warnedOnceNullMapKey = true;
LOG.warn("Null map key encountered! Ignoring similar problems.");
}
b++;
@@ -443,44 +464,50 @@ public class LazyBinarySerDe implements

// 4/ write key-value pairs one by one
for (Map.Entry<?, ?> entry : map.entrySet()) {
- serialize(byteStream, entry.getKey(), koi);
- serialize(byteStream, entry.getValue(), voi);
+ warnedOnceNullMapKey = serialize(byteStream, entry.getKey(), koi, false, warnedOnceNullMapKey);
+ warnedOnceNullMapKey = serialize(byteStream, entry.getValue(), voi, false, warnedOnceNullMapKey);
}

- // 5/ update the byte size of the map
- int mapEnd = byteStream.getCount();
- int mapSize = mapEnd - mapStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (mapSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (mapSize);
-
- return;
+ if (!skipLengthPrefix) {
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+ }
+ return warnedOnceNullMapKey;
}
case STRUCT: {
- // 1/ reserve spaces for the byte size of the struct
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int structStart = byteStream.getCount();
-
+ int byteSizeStart = 0;
+ int structStart = 0;
+ if (!skipLengthPrefix) {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ structStart = byteStream.getCount();
+ }
// 2/ serialize the struct
- serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+ warnedOnceNullMapKey = serializeStruct(byteStream, obj, (StructObjectInspector) objInspector,
+ warnedOnceNullMapKey);

- // 3/ update the byte size of the struct
- int structEnd = byteStream.getCount();
- int structSize = structEnd - structStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (structSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (structSize);
-
- return;
+ if (!skipLengthPrefix) {
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+ }
+ return warnedOnceNullMapKey;
}
default: {
throw new RuntimeException("Unrecognized type: "

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ColumnarStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ColumnarStructObjectInspector.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ColumnarStructObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ColumnarStructObjectInspector.java Tue Jul 26 03:22:09 2011
@@ -23,12 +23,11 @@ import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.columnar.ColumnarStruct;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.columnar.ColumnarStructBase;

/**
* ColumnarStructObjectInspector works on struct data that is stored in
- * ColumnarStruct.
+ * ColumnarStructBase.
*
* The names of the struct fields and the internal structure of the struct
* fields are specified in the ctor of the ColumnarStructObjectInspector.
@@ -78,22 +77,18 @@ class ColumnarStructObjectInspector exte
return ObjectInspectorUtils.getStandardStructTypeName(this);
}

- Text nullSequence;
-
/**
* Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
*/
public ColumnarStructObjectInspector(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors, Text nullSequence) {
- init(structFieldNames, structFieldObjectInspectors, nullSequence);
+ List<ObjectInspector> structFieldObjectInspectors) {
+ init(structFieldNames, structFieldObjectInspectors);
}

protected void init(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors, Text nullSequence) {
+ List<ObjectInspector> structFieldObjectInspectors) {
assert (structFieldNames.size() == structFieldObjectInspectors.size());

- this.nullSequence = nullSequence;
-
fields = new ArrayList<MyField>(structFieldNames.size());
for (int i = 0; i < structFieldNames.size(); i++) {
fields.add(new MyField(i, structFieldNames.get(i),
@@ -101,14 +96,11 @@ class ColumnarStructObjectInspector exte
}
}

- protected ColumnarStructObjectInspector(List<StructField> fields,
- Text nullSequence) {
- init(fields, nullSequence);
+ protected ColumnarStructObjectInspector(List<StructField> fields) {
+ init(fields);
}

- protected void init(List<StructField> fields, Text nullSequence) {
- this.nullSequence = nullSequence;
-
+ protected void init(List<StructField> fields) {
this.fields = new ArrayList<MyField>(fields.size());
for (int i = 0; i < fields.size(); i++) {
this.fields.add(new MyField(i, fields.get(i).getFieldName(), fields
@@ -138,7 +130,7 @@ class ColumnarStructObjectInspector exte
if (data == null) {
return null;
}
- ColumnarStruct struct = (ColumnarStruct) data;
+ ColumnarStructBase struct = (ColumnarStructBase) data;
MyField f = (MyField) fieldRef;

int fieldID = f.getFieldID();
@@ -152,7 +144,7 @@ class ColumnarStructObjectInspector exte
if (data == null) {
return null;
}
- ColumnarStruct struct = (ColumnarStruct) data;
+ ColumnarStructBase struct = (ColumnarStructBase) data;
return struct.getFieldsAsList();
}
}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=1150978&r1=1150977&r2=1150978&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Tue Jul 26 03:22:09 2011
@@ -30,7 +30,6 @@ import java.util.Map;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.io.Text;

/**
* ObjectInspectorFactory is the primary way to create new ObjectInspector
@@ -273,16 +272,15 @@ public final class ObjectInspectorFactor

public static ColumnarStructObjectInspector getColumnarStructObjectInspector(
List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors, Text nullSequence) {
+ List<ObjectInspector> structFieldObjectInspectors) {
ArrayList<Object> signature = new ArrayList<Object>();
signature.add(structFieldNames);
signature.add(structFieldObjectInspectors);
- signature.add(nullSequence.toString());
ColumnarStructObjectInspector result = cachedColumnarStructObjectInspector
.get(signature);
if (result == null) {
result = new ColumnarStructObjectInspector(structFieldNames,
- structFieldObjectInspectors, nullSequence);
+ structFieldObjectInspectors);
cachedColumnarStructObjectInspector.put(signature, result);
}
return result;

Added: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java?rev=1150978&view=auto
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java (added)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/columnar/TestLazyBinaryColumnarSerDe.java Tue Jul 26 03:22:09 2011
@@ -0,0 +1,206 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.columnar;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.CrossMapEqualComparer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+public class TestLazyBinaryColumnarSerDe extends TestCase {
+
+ private static class InnerStruct {
+ public InnerStruct(Integer i, Long l) {
+ mInt = i;
+ mLong = l;
+ }
+ Integer mInt;
+ Long mLong;
+ }
+
+ private static class OuterStruct {
+ Byte mByte;
+ Short mShort;
+ Integer mInt;
+ Long mLong;
+ Float mFloat;
+ Double mDouble;
+ String mString;
+ List<InnerStruct> mArray;
+ Map<String, InnerStruct> mMap;
+ InnerStruct mStruct;
+ }
+
+ public void testSerDe() throws SerDeException {
+ StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory
+ .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
+ String cols = ObjectInspectorUtils.getFieldNames(oi);
+ Properties props = new Properties();
+ props.setProperty(Constants.LIST_COLUMNS, cols);
+ props.setProperty(Constants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
+ LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
+ serde.initialize(new Configuration(), props);
+
+ OuterStruct outerStruct = new OuterStruct();
+ outerStruct.mByte = 1;
+ outerStruct.mShort = 2;
+ outerStruct.mInt = 3;
+ outerStruct.mLong = 4l;
+ outerStruct.mFloat = 5.01f;
+ outerStruct.mDouble = 6.001d;
+ outerStruct.mString = "seven";
+ InnerStruct is1 = new InnerStruct(8, 9l);
+ InnerStruct is2 = new InnerStruct(10, 11l);
+ outerStruct.mArray = new ArrayList<InnerStruct>(2);
+ outerStruct.mArray.add(is1);
+ outerStruct.mArray.add(is2);
+ outerStruct.mMap = new TreeMap<String, InnerStruct>();
+ outerStruct.mMap.put(new String("twelve"), new InnerStruct(13, 14l));
+ outerStruct.mMap.put(new String("fifteen"), new InnerStruct(16, 17l));
+ outerStruct.mStruct = new InnerStruct(18, 19l);
+ BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
+
+ ObjectInspector out_oi = serde.getObjectInspector();
+ Object out_o = serde.deserialize(braw);
+ if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new CrossMapEqualComparer())) {
+ System.out.println("expected = "
+ + SerDeUtils.getJSONString(outerStruct, oi));
+ System.out.println("actual = "
+ + SerDeUtils.getJSONString(out_o, out_oi));
+ fail("Deserialized object does not compare");
+ }
+ }
+
+ public void testSerDeEmpties() throws SerDeException {
+ StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory
+ .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
+ String cols = ObjectInspectorUtils.getFieldNames(oi);
+ Properties props = new Properties();
+ props.setProperty(Constants.LIST_COLUMNS, cols);
+ props.setProperty(Constants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
+ LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
+ serde.initialize(new Configuration(), props);
+
+ OuterStruct outerStruct = new OuterStruct();
+ outerStruct.mByte = 101;
+ outerStruct.mShort = 2002;
+ outerStruct.mInt = 3003;
+ outerStruct.mLong = 4004l;
+ outerStruct.mFloat = 5005.01f;
+ outerStruct.mDouble = 6006.001d;
+ outerStruct.mString = "";
+ outerStruct.mArray = new ArrayList<InnerStruct>();
+ outerStruct.mMap = new TreeMap<String, InnerStruct>();
+ outerStruct.mStruct = new InnerStruct(180018, 190019l);
+ BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
+
+ ObjectInspector out_oi = serde.getObjectInspector();
+ Object out_o = serde.deserialize(braw);
+ if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
+ System.out.println("expected = "
+ + SerDeUtils.getJSONString(outerStruct, oi));
+ System.out.println("actual = "
+ + SerDeUtils.getJSONString(out_o, out_oi));
+ fail("Deserialized object does not compare");
+ }
+ }
+
+
+ public void testSerDeOuterNulls() throws SerDeException {
+ StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory
+ .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
+ String cols = ObjectInspectorUtils.getFieldNames(oi);
+ Properties props = new Properties();
+ props.setProperty(Constants.LIST_COLUMNS, cols);
+ props.setProperty(Constants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
+ LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
+ serde.initialize(new Configuration(), props);
+
+ OuterStruct outerStruct = new OuterStruct();
+ BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
+
+ ObjectInspector out_oi = serde.getObjectInspector();
+ Object out_o = serde.deserialize(braw);
+ if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
+ System.out.println("expected = "
+ + SerDeUtils.getJSONString(outerStruct, oi));
+ System.out.println("actual = "
+ + SerDeUtils.getJSONString(out_o, out_oi));
+ fail("Deserialized object does not compare");
+ }
+ }
+
+ public void testSerDeInnerNulls() throws SerDeException {
+ StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory
+ .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
+ String cols = ObjectInspectorUtils.getFieldNames(oi);
+ Properties props = new Properties();
+ props.setProperty(Constants.LIST_COLUMNS, cols);
+ props.setProperty(Constants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
+ LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
+ serde.initialize(new Configuration(), props);
+
+ OuterStruct outerStruct = new OuterStruct();
+ outerStruct.mByte = 1;
+ outerStruct.mShort = 2;
+ outerStruct.mInt = 3;
+ outerStruct.mLong = 4l;
+ outerStruct.mFloat = 5.01f;
+ outerStruct.mDouble = 6.001d;
+ outerStruct.mString = "seven";
+ InnerStruct is1 = new InnerStruct(null, 9l);
+ InnerStruct is2 = new InnerStruct(10, null);
+ outerStruct.mArray = new ArrayList<InnerStruct>(2);
+ outerStruct.mArray.add(is1);
+ outerStruct.mArray.add(is2);
+ outerStruct.mMap = new HashMap<String, InnerStruct>();
+ outerStruct.mMap.put(null, new InnerStruct(13, 14l));
+ outerStruct.mMap.put(new String("fifteen"), null);
+ outerStruct.mStruct = new InnerStruct(null, null);
+ BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
+
+ ObjectInspector out_oi = serde.getObjectInspector();
+ Object out_o = serde.deserialize(braw);
+ if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
+ System.out.println("expected = "
+ + SerDeUtils.getJSONString(outerStruct, oi));
+ System.out.println("actual = "
+ + SerDeUtils.getJSONString(out_o, out_oi));
+ fail("Deserialized object does not compare");
+ }
+ }
+
+
+}

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJul 26, '11 at 3:23a
activeJul 26, '11 at 3:23a
posts1
users1
websitehive.apache.org

1 user in discussion

Heyongqiang: 1 post

People

Translate

site design / logo © 2021 Grokbase