Ever tried , Ever failed !
No matter ,Try again ! Fail again , Fail better !

hive高级命令

1. Hive的Join操作

准备工作

(1)创建表

# studenta表
create table if not exists studenta(
id int,
name string)
row format delimited fields terminated by '\t';
# studentb表
create table if not exists studentb(
id int,
age int)
row format delimited fields terminated by '\t'

(2)导入数据

load data local inpath '/home/lemenness/Desktop/data/studenta.txt' into table studenta;
load data local inpath '/home/lemenness/Desktop/data/studentb.txt' into table studentb;

数据如下:

studenta.txt                    studentb.txt

10001   shiny                   10001   23  
10002   mark                    10004   22
10003   angel                   10007   24
10005   ella                    10008   21
10009   jack                    10009   25
10014   eva                     10012   25
10018   judy                    10015   20
10020   cendy                   10018   19
                                10020   26

数据之间用"\t"分割

Join详解

(1)inner join:内连接(把符合两边连接条件的数据查询出来)

select * from studenta a join studentb b on a.id=b.id;

运行结果:

10001   shiny   10001   23
10009   jack    10009   25
10018   judy    10018   19
10020   cendy   10020   26

(2)left outer join:左外连接(以左表的数据为匹配标准,左大右小,返回的数据条数与左表相同。)

select * from studenta a left join studentb b on a.id=b.id;

运行结果:

10001   shiny   10001   23
10002   mark    NULL    NULL
10003   angel   NULL    NULL
10005   ella    NULL    NULL
10009   jack    10009   25
10014   eva     NULL    NULL
10018   judy    10018   19
10020   cendy   10020   26

(3)right outer join:右外连接(以右表的数据为匹配标准,左小右大,返回的数据条数与右表相同。)

select * from studenta a right join studentb b on a.id=b.id;

运行结果:

10001   shiny   10001   23
NULL    NULL    10004   22
NULL    NULL    10007   24
NULL    NULL    10008   21
10009   jack    10009   25
NULL    NULL    10012   25
NULL    NULL    10015   20
10018   judy    10018   19
10020   cendy   10020   26

(4)full outer join:全外连接(以两个表的数据为匹配标准,返回的数据条数等于两表数据去重之和。)

select * from studenta a full join studentb b on a.id=b.id;

运行结果:

10001   shiny   10001   23
10002   mark    NULL    NULL
10003   angel   NULL    NULL
NULL    NULL    10004   22
10005   ella    NULL    NULL
NULL    NULL    10007   24
NULL    NULL    10008   21
10009   jack    10009   25
NULL    NULL    10012   25
10014   eva     NULL    NULL
NULL    NULL    10015   20
10018   judy    10018   19
10020   cendy   10020   26

(5)left semi join:左半连接(把符合两边连接条件的左表的数据显示出来。没有右半连接)

select * from studenta a left semi join studentb b on a.id=b.id;

运行结果:

10001   shiny
10009   jack
10018   judy
10020   cendy

2.数据类型

复杂数据类型

(1)array

创建表

create table if not exists employee(
name string,
age int,
work_locations array<string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','; -- array中的各元素的分隔符

导入数据

load data local inpath '/home/lemenness/data/array.txt'
into table employee;

数据如下:

shiny   23  beijing,tianjin,qingdao
jack    34  shanghai,guangzhou
mark    26  beijing,xian
ella    21  beijing
judy    30  shanghai,hangzhou,chongqing
cendy   28  beijing,shanghai,dalian,chengdu

查询数据1:

select * from employee;

运行结果:

shiny   23      ["beijing","tianjin","qingdao"]
jack    34      ["shanghai","guangzhou"]
mark    26      ["beijing","xian"]
ella    21      ["beijing"]
judy    30      ["shanghai","hangzhou","chongqing"]
cendy   28      ["beijing","shanghai","dalian","chengdu"]

查询数据2:

select work_locations from employee;

运行结果:

["beijing","tianjin","qingdao"]
["shanghai","guangzhou"]
["beijing","xian"]
["beijing"]
["shanghai","hangzhou","chongqing"]
["beijing","shanghai","dalian","chengdu"]

查询数据3:

select work_locations[0] from employee;

运行结果:

beijing
shanghai
beijing
beijing
shanghai
beijing

(2)map

创建表

create table if not exists scores(
name string,
course map<string,int>)
row format delimited
fields terminated by '\t'
collection items terminated by ','  --Map中的key/value对之间的分隔符
map keys terminated by ':'; --Map中key与value的分隔符

导入数据

load data local inpath '/home/lemenness/data/scores.txt' into table scores;

数据如下:

shiny   chinese:90,math:100,english:99
mark    chinese:89,math:56,english:87
judy    chinese:94,math:78,english:81
ella    chinese:54,math:23,english:48
jack    chinese:100,math:95,english:69
cendy   chinese:67,math:83,english:45

查询数据

select * from scores;

运行结果:

shiny   {"chinese":90,"math":100,"english":99}
mark    {"chinese":89,"math":56,"english":87}
judy    {"chinese":94,"math":78,"english":81}
ella    {"chinese":54,"math":23,"english":48}
jack    {"chinese":100,"math":95,"english":69}
cendy   {"chinese":67,"math":83,"english":45}

查询数据

select course from scores;

运行结果:

{"chinese":90,"math":100,"english":99}
{"chinese":89,"math":56,"english":87}
{"chinese":94,"math":78,"english":81}
{"chinese":54,"math":23,"english":48}
{"chinese":100,"math":95,"english":69}
{"chinese":67,"math":83,"english":45}

查询数据

select name,course['math'] from scores;

运行结果:

shiny   100
mark    56
judy    78
ella    23
jack    95
cendy   83

(3)struct

创建表

create table if not exists course_score(
id int,
courses struct<course:string,score:int>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';  --struct各元素之间的分隔符

导入数据

load data local inpath '/home/lemenness/data/coursescore.txt'
into table course_score;

数据如下:

1   chinese,100
2   math,98
3   english,99
4   computer,78

查询数据

select * from course_score;

运行结果:

1       {"course":"chinese","score":100}
2       {"course":"math","score":98}
3       {"course":"english","score":99}
4       {"course":"computer","score":78}

查询数据:

select courses from course_score;

运行结果:

{"course":"chinese","score":100}
{"course":"math","score":98}
{"course":"english","score":99}
{"course":"computer","score":78}

查询数据:

select courses.course from course_score;

运行结果:

chinese
math
english
computer

查询数据:

select courses.score from course_score;

运行结果:

100
98
99
78
赞(2) 打赏
未经允许不得转载:Mr. Almost的个人博客 » hive高级命令

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

谢谢老板~

支付宝扫一扫打赏

微信扫一扫打赏