FAQ
Author: namit
Date: Tue Oct 16 05:35:53 2012
New Revision: 1398654

URL: http://svn.apache.org/viewvc?rev=1398654&view=rev
Log:
HIVE-3581 get_json_object and json_tuple return null in the presence of new line characters
(Kevin Wilfong via namit)


Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java Tue Oct 16 05:35:53 2012
@@ -29,6 +29,8 @@ import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser.Feature;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.type.TypeFactory;
import org.codehaus.jackson.type.JavaType;
@@ -59,7 +61,12 @@ public class UDFJson extends UDF {
private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");

- private static final ObjectMapper MAPPER = new ObjectMapper();
+ private static final JsonFactory JSON_FACTORY = new JsonFactory();
+ static {
+ // Allows for unescaped ASCII control characters in JSON values
+ JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+ }
+ private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);

// An LRU cache using a linked hash map

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java Tue Oct 16 05:35:53 2012
@@ -35,6 +35,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser.Feature;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.type.TypeFactory;
import org.codehaus.jackson.type.JavaType;
@@ -51,7 +53,12 @@ public class GenericUDTFJSONTuple extend

private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());

- private static final ObjectMapper MAPPER = new ObjectMapper();
+ private static final JsonFactory JSON_FACTORY = new JsonFactory();
+ static {
+ // Allows for unescaped ASCII control characters in JSON values
+ JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+ }
+ private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);

int numCols; // number of output columns

Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q Tue Oct 16 05:35:53 2012
@@ -26,4 +26,15 @@ SELECT get_json_object(src_json.json, '$

SELECT get_json_object(src_json.json, '$.zip code') FROM src_json;

-SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
\ No newline at end of file
+SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
+
+
+-- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE;
+
+INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+
+SELECT * FROM dest2;
+
+SELECT get_json_object(c1, '$.a') FROM dest2;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q Tue Oct 16 05:35:53 2012
@@ -34,3 +34,14 @@ explain
select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2;

select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2;
+
+
+-- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE;
+
+INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+
+SELECT * FROM dest1;
+
+SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out Tue Oct 16 05:35:53 2012
@@ -173,3 +173,47 @@ POSTHOOK: Input: default@src_json
#### A masked pattern was here ####
POSTHOOK: Lineage: dest1.c1 SIMPLE []
1234
+PREHOOK: query: -- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+PREHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest2
+POSTHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+PREHOOK: query: SELECT * FROM dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+{"a":"b
+c"}
+PREHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+b
+c

Modified: hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out Tue Oct 16 05:35:53 2012
@@ -571,3 +571,51 @@ POSTHOOK: Lineage: json_t.key EXPRESSION
NULL 1
2 2
value2 1
+PREHOOK: query: -- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: SELECT * FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+{"a":"b
+c"}
+PREHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+b
+c

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupcommits @
categorieshive, hadoop
postedOct 16, '12 at 5:36a
activeOct 16, '12 at 5:36a
posts1
users1
websitehive.apache.org

1 user in discussion

Namit: 1 post

People

Translate

site design / logo © 2022 Grokbase