hive具体操作

时间:2022-05-06
本文章向大家介绍hive具体操作,主要内容包括1 创建表、2 导入数据、3 分区、4 桶操作、5 多表插入、6 表操作、6.2 删除表、7 连接、7.2 左向外连接、7.3 右向外连接、7.4 全外连接、7.5 半连接、8 子查询操作、9 创建视图、基本概念、基础应用、原理机制和需要注意的事项等,并结合实例形式分析了其使用技巧,希望通过本文能帮助到大家理解应用这部分内容。

Hive对hadoop集群及数据库操作说明文档

1 创建表

hive> create table userinfo(id int, name string) row format delimited fields terminated by 't';
hive> create table choice(userid int, classname string) row format delimited fields terminated by 't';
hive> create table classinfo(teacher string, classname string) row format delimited fields terminated by 't';

2 导入数据

分别建立三个本地数据表单data1,data2,data3,然后导入到hive中,导入具体操作如下:

hive> load data local inpath '/home/hadoop/hive-data/data1' overwrite into table userinfo;
hive> load data local inpath '/home/hadoop/hive-data/data2' overwrite into table choice;                      
hive> load data local inpath '/home/hadoop/hive-data/data3' overwrite into table classinfo;
hive> select * from userinfo
    > ;
1       wangming
2       xiaoming
3       wujing
4       zhangqi
5       xiaomi
6       wangyi

3 分区

hive> create table ptest(userid int) partitioned by (name string) row format delimited fields terminated by 't';
hive> load data local inpath '/home/hadoop/hive-data/data1' into table ptest partition(name='jack');
hive> dfs -ls /user/hive/warehouse/ptest/name=jack;
Found 1 items
-rw-r--r--   2 hadoop supergroup         59 2012-08-29 19:50 /user/hive/warehouse/ptest/name=jack/data1
hive> select userid from ptest where name='jack';
1
2
3
4
5
6
hive> show partitions ptest;
name=jack
hive> insert overwrite table ptest partition(name='jack') select name from userinfo;

4 桶操作

hive> set hive.enforce.bucketing=true;
hive> set hive.enforce.bucketing;
hive.enforce.bucketing=true
hive> create table btest2(id int, name string) clustered by(id) into 3 buckets row format delimited fields terminated by 't';
hive> insert overwrite table btest2 select * from userinfo; 
hive> dfs -ls /user/hive/warehouse/btest2;
Found 3 items
-rw-r--r--   2 hadoop supergroup         18 2012-08-29 20:09 /user/hive/warehouse/btest2/000000_0
-rw-r--r--   2 hadoop supergroup         21 2012-08-29 20:09 /user/hive/warehouse/btest2/000001_0
-rw-r--r--   2 hadoop supergroup         20 2012-08-29 20:09 /user/hive/warehouse/btest2/000002_0
hive> dfs -cat /user/hive/warehouse/btest2/000001_0;
1       wangming
4       zhangqi
hive> dfs -cat /user/hive/warehouse/btest2/000000_0;
3       wujing
6       wangyi
hive> dfs -cat /user/hive/warehouse/btest2/000002_0;
2       xiaoming
5       xiaomi
hive> select * from btest2 tablesample(bucket 1 out of 3 on id);
3       wujing
6       wangyi

5 多表插入

hive> create table mutil1 as select id,name from userinfo;
hive> create table mutil2 like mutil1;
hive> from userinfo insert overwrite table mutil1 select id,name insert overwrite table mutil2 select count(distinct id),name group by name; 

6 表操作

6.1 修改表

hive> alter table mutil1 rename to mutil11;

6.2 删除表

hive> drop table mutil11;
OK
Time taken: 0.534 seconds
hive> show tables;
OK
btest2
choice
classinfo
mutil2
ptest
tt
userinfo
Time taken: 0.037 seconds

7 连接

7.1 内连接

hive> select * from userinfo;
OK
1       wangming
2       xiaoming
3       wujing
4       zhangqi
5       xiaomi
6       wangyi
Time taken: 0.052 seconds
hive> select * from choice;  
OK
1       class1
2       class2
3       class3
4       class4
5       class5
6       class6
Time taken: 0.05 seconds
hive> select userinfo.*,choice.* from userinfo join choice on (userinfo.id=choice.userid);
1       wangming        1       class1
2       xiaoming        2       class2
3       wujing  3       class3
4       zhangqi 4       class4
5       xiaomi  5       class5
6       wangyi  6       class6

7.2 左向外连接

hive> select userinfo.*,choice.* from userinfo left outer join choice on (userinfo.id=choice.userid);
1       wangming        1       class1
2       xiaoming        2       class2
3       wujing  3       class3
4       zhangqi 4       class4
5       xiaomi  5       class5
6       wangyi  6       class6

7.3 右向外连接

hive> select userinfo.*,choice.* from userinfo right outer join choice on (userinfo.id=choice.userid);
1       wangming        1       class1
2       xiaoming        2       class2
3       wujing  3       class3
4       zhangqi 4       class4
5       xiaomi  5       class5
6       wangyi  6       class6

7.4 全外连接

hive> select userinfo.* from userinfo full outer join choice on (userinfo.id=choice.userid);
1       wangming
2       xiaoming
3       wujing
4       zhangqi
5       xiaomi
6       wangyi

7.5 半连接

hive> select userinfo.* from userinfo left semi join choice on (userinfo.id=choice.userid);
1       wangming
2       xiaoming
3       wujing
4       zhangqi
5       xiaomi
6       wangyi

8 子查询操作

hive> select teacher,MAX(class_num) from (select teacher,count(classname) as class_num from classinfo group by teacher) subq group by teacher;
pro.li  1
pro.liu 1
pro.mi  1
pro.qi  1
pro.wang        1
pro.zhang       1

9 创建视图

hive> create view teacher_classnum as select teacher,count(classname) from classinfo group by teacher;