Monday, June 12, 2017

Hive-Processing temperature dataset in hive

$cat > temp1
xxxxx2009xxx27xxxx
xxxxx2010xxx39xxxx
xxxxx2015xxx45xxxx
xxxxx2016xxx49xxxx
xxxxx2020xxx55xxxx
xxxxx2012xxx43xxxx
xxxxx2006xxx40xxxx


create table raw_temp(line string);

load data local inpath 'temp1' into table raw_temp;

In the above dataset get year,temperature and display
-----------------------------------------------------
we get temperature and year from a row by using the method substr()

syntax
------
substr(colname,position,length)

position-->From where the substr starts.
length-->no of characters in the substring.

substr(line,6,4)-->returns year from the above dataset.
substr(line,13,2)-->returns temperature from the above dataset.


hive>select substr(line,6,4),substr(line,13,2) from raw_temp;

output
------
2009 27
2010 39
2015 45
2016 49
2020 55
2012 43
2006 40

hive>select substr(line,0,5) from raw_temp;

copying data from table to table
--------------------------------
syntax
------
insert overwrite table destinationtablename
select *from sourcetablename;

cat > file1
1,2
3,4
5,6

hive> create table tab9(a int,b int)
row format delimited fields terminated by ',';

hive>load data local inpath 'file1' into table tab9;

hive> create table tab10(a int,b int)
row format delimited fields terminated by ',';

hive>insert overwrite table tab10
select *from tab9;

copying data from table to table with different order
-----------------------------------------------------
create  table tab11(a int,b int);


insert overwrite table tab11
select a,b from tab8;

hive>select *from tab11;


Filtering year,temparture from temperature datset
-------------------------------------------------
create table year_temp(y int,t int);

insert overwrite table year_temp
select substr(line,6,2),substr(line,13,2) from raw_temp;

select *from year_temp;

Processing employee dataset
---------------------------
cat > employee.txt
1,abc,1000.00,m,11
2,bbb,2000.00,f,12
3,ccc,2000.00,f,12
4,ddd,3000.00,m,14
5,eee,4000.00,f,15


create table emp(eno int,name string,sal double,gender string,dno int);

load data local inpath 'employee.txt' into table emp;

select *from emp;

tabl22
------
create table emp2(eno int,name string,sal double,gender string,dno int);

insert overwrite table emp2
select *from emp;

select *from emp2;


emp with 3 cols
--------------
create table emp3(eno int,name string,sal double);

insert overwrite table emp3
select eno,name,sal from emp;

select *from emp3;


Appedning data to a table using insert command
----------------------------------------------
Remove overwrite and use insert into command for appending data to a destination table from a source table.

insert into table emp3
select eno,name,sal from emp2;

select *from emp3;
1,
2,
..
..
1,..
2,..

temperature dataset
-------------------
cat > temp1.txt
xxxxx2009xxx27xxxx
xxxxx2010xxx39xxxx
xxxxx2015xxx-45xxxx
xxxxx2016xxx49xxxx
xxxxx2020xxx-55xxxx
xxxxx2012xxx43xxxx
xxxxx2006xxx-40xxxx

create rawtemp1(line string);

load data local inpath 'temp1.txt' into table rawtemp1;

select *from rawtemp1;

case1:displaying +ve temperatures
---------------------------------


case2:displaying -ve temperatures
---------------------------------


case3:Merging both +ve and -ve temperatures
-------------------------------------------
Merging results of 2 subqueries is done by using the clause unionall

union is not supported in hive

syntax
------
select *from(sql_query1
union all
sql_query2) temp_table_name;


















































































     
























































No comments: