cd /usr/local/hive/conf mv hive-default.xml.template hive-default.xml
[hadoop@VM-24-13-centos conf]$ vi hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive1234</value> <description>记得在创建用户时,密码要和这个对应</description> </property> </configuration>
安装和配置mysql
安装mysql
我之前已经装好了,省略此步骤
配置mysql
新建hive数据库
1 2
[hadoop@VM-24-13-centos conf]$ mysql -u root -p create database hive; #这个hive数据库与hive-site.xml中localhost:3306/hive的hive对应,用来保存hive元数据
配置mysql允许hive接入
1 2 3 4 5 6 7 8 9
mysql> set global validate_password.policy='LOW'; # mysql8后,有密码策略要求,改为低 mysql >create user hive@localhost identified by 'hive1234'; # hive 代表你要创建的此数据库的新用户账号 # localhost 代表访问本地权限,不可远程访问,还有其他值 # %代表通配所有host地址权限(可远程访问) # 指定特殊Ip访问权限 如10.138.106.10 # hive1234代表你要创建的此数据库的新用密码 mysql>grant all privileges on *.* to 'hive'@'%'# 授权数据库给hive用户 mysql> flush privileges; #刷新mysql系统权限关系表
配置hive
启动hive
启动hive之前,请先启动hadoop集群。
1 2 3
start-all.sh #启动hadoop hive #启动hive
错误处理
1 2
hive> show databases;# 输入后报错 FAILED: HiveException java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
create database if not exists hive; #创建数据库 show databases; #查看Hive中包含数据库 show databases like 'h.*'; #查看Hive中以h开头数据库 use hive; # 使用数据库 show tables; # 查看表列表 drop table usr; # 删除表
create view view_name as....; #创建视图 alter view view_name set tblproperties(…); #修改视图
因为视图是只读的,所以 对于视图只允许改变元数据中的 tblproperties属性。
1 2 3 4 5
#删除视图 drop view if exists view_name; #创建索引 create index index_name on table table_name(partition_name/column_name) as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' with deferred rebuild....;
create table if not exists hive.stu(id int,name string) row format delimited fields terminated by '\t'; create table if not exists hive.course(cid int,sid int) row format delimited fields terminated by '\t';
hive> use hive; hive> load data local inpath '/usr/local/hadoop/examples/stu.txt' overwrite intotable stu; Loading data totable hive.stu OK # 查询到数据 hive>select*from hive.stu; OK 1 xiapi 2 xiaoxue 3 qingqing Time taken: 1.324 s
hive>createtable stu1 asselect id,name from stu; ... Moving data to directory hdfs://localhost:9000/user/hive/warehouse/hive.db/.hive-staging_hive_2021-12-10_10-42-32_320_1543053100774530944-1/-ext-10002 Moving data to directory hdfs://localhost:9000/user/hive/warehouse/hive.db/stu1 MapReduce Jobs Launched: Stage-Stage-1: HDFS Read: 31 HDFS Write: 114 SUCCESS Total MapReduce CPU Time Spent: 0 msec OK # 查询到stu1表结构如下 hive>describe stu1; OK id int name string Time taken: 0.268 seconds, Fetched: 2row(s)
上面是创建表,并直接向新表插入数据;若表已经存在,向表中插入数据需执行以下命令:
1 2 3 4 5 6 7
# 这里关键字overwrite的作用是替换掉表(或分区)中原有数据,换成into关键字,直接追加到原有内容后。 hive>insert overwrite table stu1 select id,name from stu where(id='1');
# 查询发现只有id为1的数据,其他数据全部清空 hive>select*from hive.stu1; OK 1 xiapi
从表中导出数据
导出到本地文件
1 2 3 4 5 6 7 8 9 10 11
hive> insert overwrite local directory '/usr/local/hadoop/examples/export_stu' select * from hive.stu; ... Moving data to local directory /usr/local/hadoop/examples/export_stu MapReduce Jobs Launched: Stage-Stage-1: HDFS Read: 30 HDFS Write: 0 SUCCESS # 查看导出的文件 [hadoop@VM-24-13-centos local]$ cat /usr/local/hadoop/examples/export_stu/000000_0 1xiapi 2xiaoxue 3qingqing
导出到hdfs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
hive> insert overwrite directory '/usr/local/hadoop/examples/export_hdfs_stu' select * from hive.stu; ...
Moving data to directory /usr/local/hadoop/examples/export_hdfs_stu MapReduce Jobs Launched: Stage-Stage-1: HDFS Read: 30 HDFS Write: 30 SUCCESS Total MapReduce CPU Time Spent: 0 msec OK
hive>select stu.*, course.*from stu rightouterjoin course on(stu.id=course.sid); Total MapReduce CPU Time Spent: 0 msec OK 2 xiaoxue 12 1 xiapi 21 1 xiapi 31 Time taken: 10.887 seconds, Fetched: 3row(s)
hive>createtable docs(line string); hive> load data inpath 'input' overwrite intotable docs; # createtable word_count 表示创建数据库 # asselect word, count(1) as count 表示查询列表,一个word,一个统计值 # from (select explode(split(line,' '))as word from docs) 这里就是从docs复制数据 hive>createtable word_count asselect word, count(1) as count from (select explode(split(line,' '))as word from docs) w groupby word orderby word;
执行后,用select语句查看,结果如下:
1 2 3 4 5 6
hive>select*from word_count; OK hadoop 1 hello 2 world 1 Time taken: 0.148 seconds, Fetched: 3row(s)