JavaMastar: Hive-Json Procession[semi structured data]

A json is a

{
"name":"manohar",
"gender":"male",
"color":"white",
"age":34
}

[cloudera@quickstart ~]$ cat > json1
{"name":"manohar","gender":"male","age":"34"}
{"name":"vidhatri","gender":"female","age":"1","city":"nellore"}
^C

hive> create database jsondatabase;
hive> use jsondatabase;
hive> create table person(line string);
hive> load data local inpath 'json1' into table person;
hive> select *from person;

hive:get_json_object
--------------------
This method is used to select a property/node from a json file/record in a hive table.

It takes 2 parameters
i)colname
ii)propertyname

syntax
------
get_json_object(colname,'propertyname');

$-->It is rootnode of json record.

hive> select get_json_object(line,'$.name') from person;

hive> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;

converting semistructured to structured in hive
-----------------------------------------------
we can split the properties of a json into cols of a hive table,which makes json strcuctured.

hive> create table personal_details(name string,age int,gender string,city string);

hive> insert overwrite table personal_details
> select
> get_json_object(line,'$.name')
> ,get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;

hive:json_tuple
---------------
A json_tuple is used to split raw data into cols/properties.

hive> create table personal_details(name string,gender string,age int,city string);

hive>insert ....
>select alisname.* from tablename
lateral view
json_tuple(colname,'prop1,'prop2',.....)
aliasname as name1,name2,.....;

hive> insert overwrite table personal_details
> select x.* from json_rawtable
> lateral view json_tuple
> (line,'name','gender','age','city')
> x as n,g,a,c;

Json with child node
--------------------
[cloudera@quickstart ~]$ cat > json2
{"name":"manohar","age":"34","address":{"state":"india","city":"hyd"},"gender":"male"}

hive> create table person_info(line string);
hive> load data local inpath 'json2' into table person_info;
hive> select *from person_info;

Loading data col wise
---------------------
hive> create table person_info_table(name string,age int,address string,gender string);

hive> insert overwrite table person_info_table
> select x.* from person_info
> lateral view
> json_tuple(line,'name','age','address','gender')x
> as n,a,addr,g;
hive> select *from person_info_table;

Loading child node by spitting col wise
---------------------------------------
hive> create table per_info(name string,age int,state string,city string,gender string);

hive> insert overwrite table json_strdata
> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.address.state'),
> get_json_object(line,'$.address.city'),
> get_json_object(line,'$.gender')
> from json_rawtable;

hive> insert overwrite table per_info
> select name,age,get_json_object(address,'$.state'),
> get_json_object(address,'$.city'),
> gender from person_info_table;

JavaMastar

Monday, June 19, 2017

Hive-Json Procession[semi structured data]

No comments:

Total Pageviews