Joins
-----
A Join is used to retrieve data from 2 or more tables based on a common columns
inner join
----------
Displays all the matching rows between 2 tables based on some criteria.
[cloudera@quickstart ~]$ cat > emp.txt
eno name sal g dno
101,AAAAA,1000.00,m,11
102,BBBBB,2000.00,m,12
103,CCCCC,3000.00,f,13
104,DDDDD,4000.00,f,14
105,EEEEE,5000.00,m,15
106,FFFFF,6000.00,f,16
107,GGGGG,7000.00,m,17
108,HHHHH,8000.00,m,18
109,xxxxx,1000.00,f,11
110,yyyyy,2000.00,f,18
111,zzzzz,2000.00,f,15
create 2 table
empdata
eno
name
sal
gender
dno
tax
dept
dno
dname
dloc
syntax
------
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 inner join tablename2 reference2 on condition;
hive> select e.eno,e.name,e.sal,e.dno,d.dname,d.dloc from emp_data e inner join dept d on e.dno=d.dno;
hive>create table dept(dno int,dname string,dloc string);
row format delimited fields terminated by ',';
hive>insert into dept values(11,'training','hyd');
hive>insert into dept values(13,'IT','hyd');
hive>insert into dept values(14,'Automobile','hyd');
hive>insert into dept values(15,'Finance','hyd');
hive> select *from dept;
OK
cat > dept.txt
11,training,hyd
13,IT,hyd
14,Automobile,hyd
16,Finance,hyd
Task:
Displays employee,dept details matching between both tables.
example:
hive>hive> select eno,name,sal,d.dno,d.name from empdata e join dept d on(e.dno=d.dno);
101 AAAAA 1000.0 11 training
103 CCCCC 3000.0 13 IT
104 DDDDD 4000.0 14 Automobile
106 FFFFF 6000.0 16 Finance
outer join
left outer join
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 left join tablename2 reference2 on condition;
hive> select e.eno,e.name,e.sal,d.name,d.dno,d.loc from empdata e left join dept d on(e.dno=d.dno);
OK
1 aaaa 1000.0 training 11 hyd
2 bbbb 2000.0 NULL NULL NULL
3 cccc 3000.0 finance 13 hyd
4 dddd 4000.0 hr 14 chennai
5 eeee 5000.0 training 11 hyd
6 ffff 2000.0 analyst 15 delhi
7 gggg 3000.0 finance 13 hyd
8 hhhh 4000.0 NULL NULL NULL
1 cccc NULL NULL NULL NULL
right outer join
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 right join tablename2 reference2 on condition;
hive> select eno,name,sal,d.dno,dname from empdata e right outer join dept d on(e.dno=d.dno);
full outer join
---------------
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 full outer join tablename2 reference2 on condition;
hive> select eno,name,sal,d.dno,dname from empdata e full outer join dept d on(e.dno=d.dno);
cross join
----------
cartesian product
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 cross join tablename2 reference2;
hive> select e.eno,e.name,e.sal,d.name,d.dno,d.loc from empdata e cross join dept d;
self join
---------
hive> select e.eno,e.name,e.sal from empdata e join empdata e1 on(e.eno=e1.eno);
Displays matching rows between two cols of same table.
We can apply both inner as well as outer join on self join.
-----
A Join is used to retrieve data from 2 or more tables based on a common columns
inner join
----------
Displays all the matching rows between 2 tables based on some criteria.
[cloudera@quickstart ~]$ cat > emp.txt
eno name sal g dno
101,AAAAA,1000.00,m,11
102,BBBBB,2000.00,m,12
103,CCCCC,3000.00,f,13
104,DDDDD,4000.00,f,14
105,EEEEE,5000.00,m,15
106,FFFFF,6000.00,f,16
107,GGGGG,7000.00,m,17
108,HHHHH,8000.00,m,18
109,xxxxx,1000.00,f,11
110,yyyyy,2000.00,f,18
111,zzzzz,2000.00,f,15
create 2 table
empdata
eno
name
sal
gender
dno
tax
dept
dno
dname
dloc
syntax
------
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 inner join tablename2 reference2 on condition;
hive> select e.eno,e.name,e.sal,e.dno,d.dname,d.dloc from emp_data e inner join dept d on e.dno=d.dno;
hive>create table dept(dno int,dname string,dloc string);
row format delimited fields terminated by ',';
hive>insert into dept values(11,'training','hyd');
hive>insert into dept values(13,'IT','hyd');
hive>insert into dept values(14,'Automobile','hyd');
hive>insert into dept values(15,'Finance','hyd');
hive> select *from dept;
OK
cat > dept.txt
11,training,hyd
13,IT,hyd
14,Automobile,hyd
16,Finance,hyd
Task:
Displays employee,dept details matching between both tables.
example:
hive>hive> select eno,name,sal,d.dno,d.name from empdata e join dept d on(e.dno=d.dno);
101 AAAAA 1000.0 11 training
103 CCCCC 3000.0 13 IT
104 DDDDD 4000.0 14 Automobile
106 FFFFF 6000.0 16 Finance
outer join
left outer join
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 left join tablename2 reference2 on condition;
hive> select e.eno,e.name,e.sal,d.name,d.dno,d.loc from empdata e left join dept d on(e.dno=d.dno);
OK
1 aaaa 1000.0 training 11 hyd
2 bbbb 2000.0 NULL NULL NULL
3 cccc 3000.0 finance 13 hyd
4 dddd 4000.0 hr 14 chennai
5 eeee 5000.0 training 11 hyd
6 ffff 2000.0 analyst 15 delhi
7 gggg 3000.0 finance 13 hyd
8 hhhh 4000.0 NULL NULL NULL
1 cccc NULL NULL NULL NULL
right outer join
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 right join tablename2 reference2 on condition;
hive> select eno,name,sal,d.dno,dname from empdata e right outer join dept d on(e.dno=d.dno);
full outer join
---------------
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 full outer join tablename2 reference2 on condition;
hive> select eno,name,sal,d.dno,dname from empdata e full outer join dept d on(e.dno=d.dno);
cross join
----------
cartesian product
select reference1.col1,refence1.col2,....reference2.col1,... from tablename1 reference1 cross join tablename2 reference2;
hive> select e.eno,e.name,e.sal,d.name,d.dno,d.loc from empdata e cross join dept d;
self join
---------
hive> select e.eno,e.name,e.sal from empdata e join empdata e1 on(e.eno=e1.eno);
Displays matching rows between two cols of same table.
We can apply both inner as well as outer join on self join.
No comments:
Post a Comment