A json is a
{
"name":"manohar",
"gender":"male",
"color":"white",
"age":34
}
[cloudera@quickstart ~]$ cat > json1
{"name":"manohar","gender":"male","age":"34"}
{"name":"vidhatri","gender":"female","age":"1","city":"nellore"}
^C
hive> create database jsondatabase;
hive> use jsondatabase;
hive> create table person(line string);
hive> load data local inpath 'json1' into table person;
hive> select *from person;
hive:get_json_object
--------------------
This method is used to select a property/node from a json file/record in a hive table.
It takes 2 parameters
i)colname
ii)propertyname
syntax
------
get_json_object(colname,'propertyname');
$-->It is rootnode of json record.
hive> select get_json_object(line,'$.name') from person;
hive> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;
converting semistructured to structured in hive
-----------------------------------------------
we can split the properties of a json into cols of a hive table,which makes json strcuctured.
hive> create table personal_details(name string,age int,gender string,city string);
hive> insert overwrite table personal_details
> select
> get_json_object(line,'$.name')
> ,get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;
hive:json_tuple
---------------
A json_tuple is used to split raw data into cols/properties.
hive> create table personal_details(name string,gender string,age int,city string);
hive>insert ....
>select alisname.* from tablename
lateral view
json_tuple(colname,'prop1,'prop2',.....)
aliasname as name1,name2,.....;
hive> insert overwrite table personal_details
> select x.* from json_rawtable
> lateral view json_tuple
> (line,'name','gender','age','city')
> x as n,g,a,c;
Json with child node
--------------------
[cloudera@quickstart ~]$ cat > json2
{"name":"manohar","age":"34","address":{"state":"india","city":"hyd"},"gender":"male"}
hive> create table person_info(line string);
hive> load data local inpath 'json2' into table person_info;
hive> select *from person_info;
Loading data col wise
---------------------
hive> create table person_info_table(name string,age int,address string,gender string);
hive> insert overwrite table person_info_table
> select x.* from person_info
> lateral view
> json_tuple(line,'name','age','address','gender')x
> as n,a,addr,g;
hive> select *from person_info_table;
Loading child node by spitting col wise
---------------------------------------
hive> create table per_info(name string,age int,state string,city string,gender string);
hive> insert overwrite table json_strdata
> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.address.state'),
> get_json_object(line,'$.address.city'),
> get_json_object(line,'$.gender')
> from json_rawtable;
hive> insert overwrite table per_info
> select name,age,get_json_object(address,'$.state'),
> get_json_object(address,'$.city'),
> gender from person_info_table;
{
"name":"manohar",
"gender":"male",
"color":"white",
"age":34
}
[cloudera@quickstart ~]$ cat > json1
{"name":"manohar","gender":"male","age":"34"}
{"name":"vidhatri","gender":"female","age":"1","city":"nellore"}
^C
hive> create database jsondatabase;
hive> use jsondatabase;
hive> create table person(line string);
hive> load data local inpath 'json1' into table person;
hive> select *from person;
hive:get_json_object
--------------------
This method is used to select a property/node from a json file/record in a hive table.
It takes 2 parameters
i)colname
ii)propertyname
syntax
------
get_json_object(colname,'propertyname');
$-->It is rootnode of json record.
hive> select get_json_object(line,'$.name') from person;
hive> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;
converting semistructured to structured in hive
-----------------------------------------------
we can split the properties of a json into cols of a hive table,which makes json strcuctured.
hive> create table personal_details(name string,age int,gender string,city string);
hive> insert overwrite table personal_details
> select
> get_json_object(line,'$.name')
> ,get_json_object(line,'$.age'),
> get_json_object(line,'$.gender'),
> get_json_object(line,'$.city')
> from person;
hive:json_tuple
---------------
A json_tuple is used to split raw data into cols/properties.
hive> create table personal_details(name string,gender string,age int,city string);
hive>insert ....
>select alisname.* from tablename
lateral view
json_tuple(colname,'prop1,'prop2',.....)
aliasname as name1,name2,.....;
hive> insert overwrite table personal_details
> select x.* from json_rawtable
> lateral view json_tuple
> (line,'name','gender','age','city')
> x as n,g,a,c;
Json with child node
--------------------
[cloudera@quickstart ~]$ cat > json2
{"name":"manohar","age":"34","address":{"state":"india","city":"hyd"},"gender":"male"}
hive> create table person_info(line string);
hive> load data local inpath 'json2' into table person_info;
hive> select *from person_info;
Loading data col wise
---------------------
hive> create table person_info_table(name string,age int,address string,gender string);
hive> insert overwrite table person_info_table
> select x.* from person_info
> lateral view
> json_tuple(line,'name','age','address','gender')x
> as n,a,addr,g;
hive> select *from person_info_table;
Loading child node by spitting col wise
---------------------------------------
hive> create table per_info(name string,age int,state string,city string,gender string);
hive> insert overwrite table json_strdata
> select
> get_json_object(line,'$.name'),
> get_json_object(line,'$.age'),
> get_json_object(line,'$.address.state'),
> get_json_object(line,'$.address.city'),
> get_json_object(line,'$.gender')
> from json_rawtable;
hive> insert overwrite table per_info
> select name,age,get_json_object(address,'$.state'),
> get_json_object(address,'$.city'),
> gender from person_info_table;
No comments:
Post a Comment