Grokbase Groups Hive user March 2011
FAQ
can anyman help me ?or give me some documents relates.

i wand load data from nutch's sequence file.

my code is under.
i can execute load data script normal.
but when i run ' select * from table ' ,the error occur.
it prompt me 'Content cast Exception when 'Iterator<Writable> values =
(Iterator<Writable>)blob;' '


thanks help!

public class NutchSequenceFileSerDe implements SerDe {

public static final Log LOG =
LogFactory.getLog(NutchSequenceFileSerDe.class.getName());

int numColumns;
String inputRegex;
String outputFormatString;

Pattern inputPattern;

StructObjectInspector rowOI;
ArrayList<String> row;

@Override
public void initialize(Configuration conf, Properties tbl)
throws SerDeException {

// We can get the table definition from tbl.

// Read the configuration parameters

String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
String columnTypeProperty =
tbl.getProperty(Constants.LIST_COLUMN_TYPES);

List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
List<TypeInfo> columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();

// All columns have to be of type STRING.
for (int c = 0; c < numColumns; c++) {
if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) {
throw new SerDeException(getClass().getName()
+ " only accepts string columns, but column[" + c + "] named "
+ columnNames.get(c) + " has type " + columnTypes.get(c));
}
}

// Constructing the row ObjectInspector:
// The row consists of some string columns, each column will be a java
// String object.
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
columnNames.size());
for (int c = 0; c < numColumns; c++) {

columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
columnNames, columnOIs);

// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<String>(numColumns);
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
outputFields = new Object[numColumns];
outputRowText = new Text();
}

@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return rowOI;
}

@Override
public Class<? extends Writable> getSerializedClass() {
return Text.class;
}

// Number of rows not matching the regex
long unmatchedRows = 0;
long nextUnmatchedRows = 1;
// Number of rows that match the regex but have missing groups.
long partialMatchedRows = 0;
long nextPartialMatchedRows = 1;

long getNextNumberToDisplay(long now) {
return now * 10;
}

@Override
public Object deserialize(Writable blob) throws SerDeException {

Iterator<Writable> values = (Iterator<Writable>)blob;

//dump.append(key.toString() + "\001");
int colIndex =0;
while (values.hasNext()) {
try {
Writable value = values.next();//.get(); // unwrap
String vString = value.toString();
row.set(colIndex,vString);

} catch (RuntimeException e) {

row.set(colIndex, null);
}
colIndex ++;
}

return row;
}

Object[] outputFields;
Text outputRowText;

@Override
public Writable serialize(Object obj, ObjectInspector objInspector)
throws SerDeException {
// Get all the fields out.
// NOTE: The correct way to get fields out of the row is to use
// objInspector.
// The obj can be a Java ArrayList, or a Java class, or a byte[] or
// whatever.
// The only way to access the data inside the obj is through
// ObjectInspector.

StructObjectInspector outputRowOI = (StructObjectInspector)
objInspector;
List<? extends StructField> outputFieldRefs = outputRowOI
.getAllStructFieldRefs();
if (outputFieldRefs.size() != numColumns) {
throw new SerDeException("Cannot serialize the object because there
are "
+ outputFieldRefs.size() + " fields but the table has " +
numColumns
+ " columns.");
}

// Get all data out.
for (int c = 0; c < numColumns; c++) {
Object field = outputRowOI
.getStructFieldData(obj, outputFieldRefs.get(c));
ObjectInspector fieldOI = outputFieldRefs.get(c)
.getFieldObjectInspector();
// The data must be of type String
StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
// Convert the field to Java class String, because objects of String
type
// can be
// stored in String, Text, or some other classes.
outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
}

// Format the String
String outputRowString = "";
for (Object o : outputFieldRefs) {
outputRowString += o.toString();
}
outputRowText.set(outputRowString);
return outputRowText;
}

}

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupuser @
categorieshive, hadoop
postedMar 24, '11 at 6:36a
activeMar 24, '11 at 6:36a
posts1
users1
websitehive.apache.org

1 user in discussion

徐厚道: 1 post

People

Translate

site design / logo © 2021 Grokbase