源码编译安装 apache hadoop

安装 apache hadoop 2.7.2 集群

这是我安装编译hadoop得意个笔记手册,和大家分享一下。

环境:

系统 Centos 7
java : 1.8
apache hadoop 版本 : 2.7.2

hadoop集群hosts列表
IP hostname 运行服务
192.168.77.158 namenode00.host-shining.com namenode、zk、journalnode、standby-resourcemanager,hbase-master、spark-master
192.168.77.159 namenode01.host-shining.com namenode、zk、journalnode、resourcemanager,hbase-master、jobhistory
192.168.77.161 datanode00.host-shining.com datanode、nodemanager、zk、journalnode
192.168.77.162 datanode01.host-shining.com datanode、nodemanager、zk、journalnode
192.168.77.163 datanode02.host-shining.com datanode、nodemanager、zk、journalnode
192.168.77.164 datanode03.host-shining.com datanode、nodemanager、regionserver、spark-work
192.168.77.165 datanode04.host-shining.com datanode、nodemanager、regionserver、spark-work
192.168.77.166 datanode05.host-shining.com datanode、nodemanager、regionserver、spark-work
192.168.77.167 datanode06.host-shining.com datanode、nodemanager、regionserver、spark-work
192.168.77.168 datanode07.host-shining.com datanode、nodemanager
192.168.77.169 datanode08.host-shining.com datanode、nodemanager
192.168.77.170 datanode09.host-shining.com datanode、nodemanager

hosts文件并同步到每台机器上。

hadoop client 列表
IP hostname
192.168.77.160 client01.host-shining.com
安装软件包和lzop软件(每台机器都执行)
1
2
3
4
5
yum -y install  wget gcc gcc-c++ gcc-g77 autoconf automake zlib* fiex* libxml* ncurses-devel libmcrypt* libtool-ltdl-devel* make cmake bind-utils ntp ntpdate lrzsz rsync gzip unzip vim telnet openssl-devel nscd g++ sysstat ncurses-libs bzip2-devel git lsof expect  

yum -y install *gcc* ncurses-devel openssl-devel cmake autoconfautomake libtool bzip2-devel g++ autoconf automake libtool cmake zlib1g-dev pkg-config

yum -y install lzo-devel zlib-devel gcc autoconf automakelibtool lzop subversion psmisc nc
安装jdk 1.8 (需要每台机器执行)
1
2
tar -zxf jdk-8u45-linux-x64.tar.gz && mkdir /usr/java/ && mv  jdk1.8.0_45/ /usr/java/
echo -e "export JAVA_HOME=/usr/java/jdk1.8.0_45\nexport PATH=\$JAVA_HOME/bin:\$JAVA_HOME/jre/bin:\$PATH" >> /etc/profile
创建hadoop用户并添加无密码登入(每台集群都执行)
1
2
3
useradd hadoop
su - hadoop
echo "hadoop|xiaoaojianghu" | chpasswd

创建添加key的脚本,每台机器添加完用户之后执行 sh key_add.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
vim key_add.sh

#!/bin/bash
source /etc/profile
# 创建pub key文件
expect -c 'set timeout -1;
spawn ssh-keygen -t rsa;
expect "Enter file in which to save the key";
send "\n";
expect "(empty for no passphrase):"
send "\n";
expect "Enter same passphrase again:";
send "\n";
interact'
# 在hosts文件里找到name和data的主机名
for ip in `cat /etc/hosts | grep -v "^#" |grep -E "name|data"|awk '{print $2}'`
do
echo $ip
# add hadoop pub key
expect -c 'set timeout -1;
spawn ssh-copy-id -i .ssh/id_rsa.pub '$ip';
expect "Are you sure you want to continue connecting (yes/no)?";
send "yes\n";
expect "password:";
send "xiaoaojianghu\n";
interact'

done

for ip in `cat /etc/hosts | grep -E "name|data"|awk '{print $1}'`;do echo $ip ;ssh-copy-id -i ~/.ssh/id_rsa.pub $ip;done
datanode机器格式化硬盘 (所有datanode节点执行)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
vim fdisk.sh

#!/bin/bash
source /etc/profile
yum install -y expect parted

#for letter in b c d e f g h i j k l m #默认列表
for letter in `fdisk -l | grep 4000 | awk '{print $2}' | cut -c 8 | sort` # 找到4T的硬盘并格式化。
do
expect -c 'set timeout -1;
spawn parted /dev/sd'$letter';
expect "(parted)";
send "mklabel gpt\n";
expect "(parted)";
send "unit GB\n";
expect "(parted)";
send "mkpart primary 0 -1\n";
expect "(parted)";
send "quit\n";
interact'

nohup mkfs.xfs /dev/sd${letter}1 > sd$letter.out 2>&1 &
done
mount 挂在硬盘 (所有datanode节点执行)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
vim mount.sh

#!/bin/bash
source /etc/profile

blkid | sort | grep -v sdm| awk -F '"' '{print $2}' > /tmp/uuid
echo "/data00
/data01
/data02
/data03
/data04
/data05
/data06
/data07
/data08
/data09
/data10
/data11" > /tmp/dir

for dir in `cat /tmp/dir`
do
mkdir -p $dir
done
# 也可以用
# mkdir /data{00..11}

l=$(cat /tmp/uuid | wc -l)
for ((i=1;i<=$l;i++))
do
u=$(sed -n "$i"p /tmp/uuid)
d=$(sed -n "$i"p /tmp/dir)
mount UUID=$u $d
cp /etc/fstab /etc/fstab_backup
echo -e "UUID=$u\t$d\t\txfs\tdefaults,noatime,nodiratime\t0 0" >> /etc/fstab
# noatime,nodiratim 禁用文件访问时间
done
下载软件包
1
2
3
4
5
6
7
8
9
/home/hadoop/apache-hadoop && cd /home/hadoop/apache-hadoop
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/stable-2/apache-hive-2.1.0-bin.tar.gz
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/stable-2/apache-hive-2.1.0-src.tar.gz
wget http://apache.fayea.com/hbase/stable/hbase-1.1.5-bin.tar.gz
wget http://apache.fayea.com/hbase/stable/hbase-1.1.5-src.tar.gz
wget http://apache.fayea.com/mahout/0.12.2/apache-mahout-distribution-0.12.2.tar.gz
wget http://apache.fayea.com/zookeeper/stable/zookeeper-3.4.8.tar.gz
wget http://mirror.bit.edu.cn/apache/spark/spark-1.6.2/spark-1.6.2.tgz
安装zookeeper
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
tar -zxvf zookeeper-3.4.8.tar.gz && ln -s zookeeper-3.4.8 zookeeper
cd /home/hadoop/apache-hadoop/zookeeper && mkdir -p var/{data,datalog}
cd conf
echo "JAVA_HOME=/usr/java/jdk1.8.0_45" > java.env
echo "export JAVA_OPTS=\"-Xms1000m -Xmx1000m\"" >> java.env
cp zoo_sample.cfg zoo.cfg
sed -i '/^dataDir=/c dataDir=/home/hadoop/apache-hadoop/zookeeper/var/data' zoo.cfg
echo "dataLogDir=/home/hadoop/apache-hadoop/zookeeper/var/datalog" >> zoo.cfg
echo "maxClientCnxns=300" >> zoo.cfg

在 zoo.cfg 里添加
server.1=namenode00.host-shining.com:2888:3888
server.2=namenode01.host-shining.com:2888:3888
server.3=datanode00.host-shining.com:2888:3888
server.4=datanode01.host-shining.com:2888:3888
server.5=datanode02.host-shining.com:2888:3888

echo 1 > /home/hadoop/apache-hadoop/zookeeper/var/data/myid
每台机器安顺序排,这个文件是不一样的。 1、2、3、4、5

用supervisor守护zookeeper (supervisor需要重启)

1
2
3
4
5
6
7
8
9
10
11
12
13

[program:zookeeper]
command = /home/hadoop/apache-hadoop/zookeeper/bin/zkServer.sh start-foreground
autostart = true
autorestart = true
startsecs = 3
startretries = 3
stopwaitsecs = 5
user = hadoop
redirect_stderr = true
stdout_logfile = /home/shining/logs/supervisor/zookeeper.log
stdout_logfile_maxbytes = 500MB
stdout_logfile_backups = 5

安装hdfs

创建目录
1
2
3
4
5
6
7
8
cd /home/hadoop/apache-hadoop
tar -zxvf hadoop-2.7.2.tar.gz && ln -s hadoop-2.7.2 hadoop
mkdir -p /home/hadoop/apache-hadoop/hadoop/var/dfs/jn
mkdir -p /home/hadoop/apache-hadoop/hadoop/var/dfs/dn
mkdir -p /home/hadoop/apache-hadoop/hadoop/var/dfs/nn
mkdir -p /home/hadoop/apache-hadoop/hadoop/var/run/hadoop-hdfs
mkdir -p /home/hadoop/apache-hadoop/hadoop/var/run/hadoop-hdfs/dn_PORT
chmod -R 755 /home/hadoop/apache-hadoop/hadoop/var/
开始修改配置文件
hdfs-site.xml 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>dfs.nameservices</name>
<value>shininghadoop</value>
</property>
<property>
<name>dfs.ha.namenodes.shininghadoop</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.shininghadoop.nn1</name>
<value>namenode00.host-shining.com:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.shininghadoop.nn2</name>
<value>namenode01.host-shining.com:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.shininghadoop.nn1</name>
<value>namenode00.host-shining.com:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.shininghadoop.nn2</name>
<value>namenode01.host-shining.com:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://namenode00.host-shining.com:8485;namenode01.host-shining.com:8485;datanode00.host-shining.com:8485;datanode01.host-shining.com:8485;datanode02.host-shining.com:8485/shininghadoop</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/dfs/jn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data00/data,/data01/data,/data02/data,/data03/data,/data04/data,/data05/data,/data06/data,/data07/data,/data08/data,/data09/data,/data10/data,/data11/data</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/dfs/nn</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.shininghadoop</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hadoop)
shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>60000</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
<description>Default block replication. The actual number of replications can be specified when the file is created. The default is used if replication is not specified in create time</description>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>60</value>
</property>
<property>
<name>dfs.datanode.balance.bandwidthPerSec</name>
<value>20971520</value>
<final>true</final>
</property>
<property>
<name>dfs.block.size</name>
<value>67108864</value>
<final>true</final>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.xcievers</name>
<value>8192</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/run/hadoop-hdfs/dn_PORT</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>

<property>
<name>dfs.ha.automatic-failover.enabled.appcluster</name>
<value>true</value>
</property>

<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
</property>

</configuration>
core-site.xml 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://shininghadoop</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>10080</value>
<description>Number of minutes between trash checkpoints.If zero, the trash feature is disabled.</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>namenode00.host-shining.com:2181,namenode01.host-shining.com:2181,datanode00.host-shining.com:2181,datanode01.host-shining.com:2181,datanode02.host-shining.com:2181</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>10080</value>
</property>
<property>
<name>io.native.lib.available</name>
<value>true</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec,
org.apache.hadoop.io.compress.BZip2Codec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
</configuration>
hadoop-env.sh 配置文件 (去掉了注释部分)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
export JAVA_HOME=/usr/java/jdk1.8.0_45
export HADOOP_HOME=/home/hadoop/apache-hadoop/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
JVM_OPTS="-server -verbose:gc
-XX:+PrintGCDateStamps
-XX:+PrintGCDetails
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=9
-XX:GCLogFileSize=20m"
export HADOOP_NAMENODE_OPTS="-Xmx40g -Xms10g -Xmn4g $JVM_OPTS -XX:ErrorFile=$HADOOP_LOG_DIR/nn_error_gc.log -Xloggc:$HADOOP_LOG_DIR/nn_gc.log -XX:HeapDumpPath=$HADOOP_LOG_DIR/nn_error.hprof"
export HADOOP_DATANODE_OPTS="-Xmx4g -Xms512m $JVM_OPTS -XX:ErrorFile=$HADOOP_LOG_DIR/dn_error_gc.log -Xloggc:$HADOOP_LOG_DIR/dn_gc.log -XX:HeapDumpPath=$HADOOP_LOG_DIR/dn_error.hprof "
export HADOOP_JOB_HISTORYSERVER_OPTS="-Xmx4g -Xms2g $JVM_OPTS -XX:ErrorFile=$HADOOP_LOG_DIR/log_error_gc.log -Xloggc:$HADOOP_LOG_DIR/log_gc.log -XX:HeapDumpPath=$HADOOP_LOG_DIR/log_error.hprof "
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx2048m $HADOOP_PORTMAP_OPTS"
export HADOOP_CLIENT_OPTS="-Xmx2048m $HADOOP_CLIENT_OPTS"
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_IDENT_STRING=$USER
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:/usr/lib64:/usr/local/lib/
mapred-site.xml 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>namenode01.host-shining.com:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>namenode01.host-shining.com:19888</value>
</property>
<property>
<name>mapreduce.task.tmp.dir</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/yarn/task</value>
</property>

<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>4096</value>
</property>

<property>
<name>mapreduce.map.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx3400m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx3400m</value>
</property>
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapred.child.env</name>
<value>LD_LIBRARY_PATH=/usr/lib64</value>
</property>

<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.job.ubertask.maxmaps</name>
<value>9</value>
</property>
<property>
<name>mapreduce.job.ubertask.maxreduces</name>
<value>1</value>
</property>
<!-- 默认值为一个数据块的大小-->
<property>
<name>mapreduce.job.ubertask.maxbytes</name>
<value>67108864</value>
</property>

</configuration>
yarn-site.xml 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>shininghadoop-yarn</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>namenode00.host-shining.com</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>namenode01.host-shining.com</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>namenode00.host-shining.com:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>namenode01.host-shining.com:8088</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>namenode00.host-shining.com:2181,namenode01.host-shining.com:2181,datanode00.host-shining.com:2181,datanode01.host-shining.com:2181,datanode02.host-shining.com:2181</value>
</property>

<!-- <property>
<name>yarn.resourcemanager.hostname</name>
<value>namenode01.host-shining.com</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>namenode01.host-shining.com:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>namenode01.host-shining.com:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>namenode01.host-shining.com:8030</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>namenode01.host-shining.com:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>namenode01.host-shining.com:8088</value>
</property> -->

<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/yarn/local-dir</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/yarn/logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/yarn/logs</value>
</property>
<property>
<name>yarn.log.aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>83968</value>
<discription>每个节点可用内存,单位MB</discription>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>18</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/home/hadoop/apache-hadoop/hadoop/var/yarn/logs/apps</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
$YARN_HOME/share/hadoop/yarn/*,
$YARN_HOME/share/hadoop/yarn/lib/*,
$YARN_HOME/share/hadoop/mapreduce/*,
$YARN_HOME/share/hadoop/mapreduce/lib/*
</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/home/hadoop/apache-hadoop/hadoop/etc/hadoop/fair-scheduler.xml</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://namenode01.host-shining.com:19888/jobhistory/logs</value>
</property>

<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>6</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>1.8</value>
</property>

<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
</configuration>
yarn-env.sh 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
export HADOOP_HOME=/home/hadoop/apache-hadoop/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
export YARN_LOG_DIR=${HADOOP_HOME}/logs
JVM_OPTS="-server -verbose:gc
-XX:+PrintGCDateStamps
-XX:+PrintGCDetails
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=9
-XX:GCLogFileSize=256m"
RESOURCEMANAGER_OPTS="-Xmx30g -Xms5g -Xmn2g $JVM_OPTS -Xloggc:$YARN_LOG_DIR/rm_gc.log"
NODEMANAGER_OPTS="-Xmx2048m -Xms1024m -Xmn512m $JVM_OPTS -Xloggc:$YARN_LOG_DIR/nm_gc.log"
export JAVA_HOME=/usr/java/jdk1.8.0_45
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi

if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx4096m
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
IFS=
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
unset IFS
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
slave 配置文件
1
2
3
4
5
6
7
8
9
10
datanode00.host-shining.com
datanode01.host-shining.com
datanode02.host-shining.com
datanode03.host-shining.com
datanode04.host-shining.com
datanode05.host-shining.com
datanode06.host-shining.com
datanode07.host-shining.com
datanode08.host-shining.com
datanode09.host-shining.com
apache maven 安装
1
2
3
cd /home/hadoop
wget http://mirror.bit.edu.cn/apache/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
tar -zxvf apache-maven-3.3.9-bin.tar.gz
设置环境变量 (每台机器都需要配置)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
export MAVEN_OPTS="-Xms1024m -Xmx1024m -Xss1m"

export OOZIE_HOME=/home/hadoop/apache-hadoop/oozie
export MAVEN_HOME=/home/hadoop/apache-maven-3.3.9
export JAVA_HOME=/usr/java/jdk1.8.0_45
export ZK_HOME=/home/hadoop/apache-hadoop/zookeeper
export HADOOP_HOME=/home/hadoop/apache-hadoop/hadoop
export HBASE_HOME=/home/hadoop/apache-hadoop/hbase
export HIVE_HOME=/home/hadoop/apache-hadoop/hive
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HDFS_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_HOME}/lib/native
export SCALA_HOME=/home/hadoop/apache-hadoop/scala
export MAHOUT_HOME=/home/hadoop/apache-hadoop/mahout
export MAHOUT_CONF_DIR=$MAHOUT_HOME/conf
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export SPARK_HOME=/home/hadoop/apache-hadoop/spark

export PATH=${JAVA_HOME}/bin:${JAVA_HOME}/jre/bin:${ZK_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HBASE_HOME}/bin:$MAHOUT_HOME/bin:${HIVE_HOME}/bin:${SCALA_HOME}/bin:${SPARK_HOME}/bin:${MAVEN_HOME}/bin:${OOZIE_HOME}/bin:$PATH
export classpath=$JAVA_HOME/lib/dt.jar:$HBASE_HOME/lib:$MAHOUT_HOME/lib:$PIG_HOME/lib:$HIVE_HOME/lib:$JAVA_HOME/lib/tools.jar:$HADOOP_CONF_DIR:$SPARK_HOME/lib:$HBASE_HOME/lib/native/Linux-amd64-64:/usr/local/lib:$HADOOP_HOME/lib/native
export HBASE_LIBRARY_PATH=${HBASE_LIBRARY_PATH}:${HBASE_HOME}/lib/native/Linux-amd64-64:/usr/local/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/Linux-amd64-64:$HADOOP_HOME/lib/native:/usr/local/lib
protobuf 安装
1
2
3
4
5
tar -zxvf protobuf-2.5.0.tar.gz   (每台机器安装)
cd protobuf-2.5.0
./configure (root用户执行)
make (root用户执行)
make install (root用户执行)
编译hdfs源码, lib库 (编译以前需要安装maven,下载之后解压,设置环境变量即可,不需要没台机器都安装)
1
2
3
4
5
wget http://apache.fayea.com/hadoop/common/hadoop-2.7.2/hadoop-2.7.2-src.tar.gz
tar -zxvf hadoop-2.7.2-src.tar.gz
cd hadoop-2.7.2-src
mvn package -Pdist,native -DskipTests -Dtar
cp -a hadoop-dist/target/hadoop-2.7.2/lib/native/* ~/apache-hadoop/hadoop/lib/native/
编译lzo压缩格式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
下载 lzo  https://codeload.github.com/twitter/hadoop-lzo/zip/master
unzip hadoop-lzo-master.zip
cd hadoop-lzo-master
vim pom.xml (修改hadoop版本)
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.current.version>2.7.2</hadoop.current.version>
<hadoop.old.version>1.0.4</hadoop.old.version>
</properties>

export CFLAGS=-m64
export CXXFLAGS=-m64
mvn clean package -Dmaven.test.skip=true
cp target/native/Linux-amd64-64/lib/* ~/apache-hadoop/hadoop/lib/native/
cp target/hadoop-lzo-0.4.20-SNAPSHOT.jar ~/apache-hadoop/hadoop/share/hadoop/common/lib/

安装hbase

1
2
3
4
5
6
tar -zxvf  hbase-1.2.2-bin.tar.gz && ln -s hbase-1.2.2 hbase
cd hbase/conf
cp $HADOOP_HOME/etc/hadoop/core-site.xml $HBASE_HOME/conf/
cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml $HBASE_HOME/conf/
对于Hbase启用LZO
cp -a $HADOOP_HOME/lib/native $HBASE_HOME/lib
修改配置文件
hbase.site.xml 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>

<property>
<name>hbase.rootdir</name>
<value>hdfs://shininghadoop/hbase</value>
</property>
<property>
<name>hbase.rest.port</name>
<value>60050</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/home/hadoop/apache-hadoop/hbase/tmp</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>namenode00.host-shining.com,namenode01.host-shining.com,datanode00.host-shining.com,datanode01.host-shining.com,datanode02.host-shining.com</value>
</property>
<property>
<name>hbase.master</name>
<value>namenode00.host-shining.com,namenode01.host-shining.com</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/hadoop/apache-hadoop/zookeeper</value>
<description>Property from ZooKeeper'sconfigzoo.cfg.The directory where the snapshot is stored.</description>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
<description>Property from ZooKeeper'sconfigzoo.cfg.Theport at which the clients will connect.</description>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>

</configuration>
hbase-env.sh 修改配置文件 (取消了注释的内容)
1
2
3
4
5
6
7
8
9
10
11
12
export JAVA_HOME=/usr/java/jdk1.8.0_45
export HADOOP_HOME=/home/hadoop/apache-hadoop/hadoop
export HBASE_HOME=/home/hadoop/apache-hadoop/hbase
export HBASE_HEAPSIZE=4096
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xms1024m -Xmx4096m"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xms1024m -Xmx4096m"
export HBASE_LOG_DIR=/home/hadoop/apache-hadoop/hbase/logs
export HBASE_MANAGES_ZK=false
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:/usr/lib64
export HBASE_LIBRARY_PATH=$HADOOP_HOME/lib/native:/usr/lib64
export CLASSPATH=$CLASSPATH:$HBASE_LIBRARY_PATH
regionservers 配置文件修改
1
2
3
4
datanode03.host-shining.com
datanode04.host-shining.com
datanode05.host-shining.com
datanode06.host-shining.com
备注
1
2
3
4
5
6
7
8
hbase-env.sh 中   
#export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
#export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
改为
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xms1024m -Xmx1024m"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xms1024m -Xmx1024m"

//// jdk 用1.8 的 PermSize MaxPermSize 参数没有,需要用 xmx

hive安装

数据库创建与授权
1
2
3
4
5
6
7
8
mysql -uroot -p
create database shininghadoop;

GRANT ALL PRIVILEGES ON shininghadoop.* TO 'shininghadoop'@"192.168.77.158" IDENTIFIED BY 'shininghadoop'WITH GRANT OPTION;
GRANT ALL PRIVILEGES ON shininghadoop.* TO 'shininghadoop'@"namenode00.host-shining.com" IDENTIFIED BY 'shininghadoop'WITH GRANT OPTION;
...... 兜圈所有hadoop节点的IP地址

FLUSH PRIVILEGES;
导入hive元数据库
1
2
3
4
5
6
tar -zxvf apache-hive-2.1.0-bin.tar.gz && ln -s apache-hive-2.1.0-bin hive
创建源数据
cd $HIVE_HOME/scripts/metastore/upgrade/mysql/
mysql -h数据库地址 -ushininghadoop -p
use shininghadoop;
source hive-schema-2.1.0.mysql.sql;
替换java jdbc jar
1
2
3
4
hive 需要 java jar
cd $HIVE_HOME/lib
把 mysql-connector-java-5.1.35.jar 放在这里
ln -s mysql-connector-java-5.1.35.jar mysql-connector-java.jar
hive-site.xml 配置文件 (根据环境配置,线上用的是default文件)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.53.101:3306/testhadoop</value>
<description>the URL of the MySQL database</description>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>testhadoop</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>testhadoop</value>
</property>

<property>
<name>datanucleus.autoCreateSchema</name>
<value>false</value>
</property>

<property>
<name>datanucleus.fixedDatastore</name>
<value>true</value>
</property>

<property>
<name>datanucleus.autoStartMechanism</name>
<value>SchemaTable</value>
</property>

<property>
<name>hive.metastore.schema.verification</name>
<value>true</value>
</property>

<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>

<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>

<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider</value>
<description>The Hive client authorization manager class name.
The user defined authorization class should implement interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider.
</description>
</property>

<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.DefaultHiveMetastoreAuthorizationProvider</value>
<description>authorization manager class name to be used in the metastore for authorization.
The user defined authorization class should implement interface org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider.
</description>
</property>

<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator</value>
<description>hive client authenticator manager class name.
The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider.</description>
</property>

<property>
<name>hive.security.metastore.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
<description>authenticator manager class name to be used in the metastore for authentication.
The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider.</description>
</property>

<property>
<name>hive.security.authorization.createtable.group.grants</name>
<value></value>
<description>the privileges automatically granted to some groups whenever a table gets created.
An example like "groupX,groupY:select;groupZ:create" will grant select privilege to groupX and groupY,
and grant create privilege to groupZ whenever a new table created.</description>
</property>

<property>
<name>hive.security.authorization.createtable.role.grants</name>
<value></value>
<description>the privileges automatically granted to some roles whenever a table gets created.
An example like "roleX,roleY:select;roleZ:create" will grant select privilege to roleX and roleY,
and grant create privilege to roleZ whenever a new table created.</description>
</property>

<property>
<name>hive.security.authorization.createtable.owner.grants</name>
<value>ALL</value>
<description>the privileges automatically granted to the owner whenever a table gets created.
An example like "select,drop" will grant select and drop privilege to the owner of the table</description>
</property>

<property>
<name>hive.auto.convert.join</name>
<value>false</value>
</property>
hive-env.sh
1
2
3
4
5
6
7
8
9
10
export HADOOP_HEAPSIZE=2048
export HIVE_CONF_DIR=/home/hadoop/apache-hadoop/hive/conf
export HIVE_AUX_JARS_PATH=/home/hadoop/apache-hadoop/hive/lib
export HADOOP_PREFIX=/home/hadoop/apache-hadoop/hadoop
export HADOOP_LIBEXEC_DIR=/home/hadoop/apache-hadoop/hadoop/libexec
export HADOOP_CONF_DIR=/home/hadoop/apache-hadoop/hadoop/etc/hadoop
export HADOOP_COMMON_HOME=/home/hadoop/apache-hadoop/hadoop
export HADOOP_HDFS_HOME=/home/hadoop/apache-hadoop/hadoop
export HADOOP_YARN_HOME=/home/hadoop/apache-hadoop/hadoop
export HADOOP_MAPRED_HOME=/home/hadoop/apache-hadoop/hadoop
启动 hive metastore和 server2
1
2
3
4
5
6
7
8
mkdir $HIVE_HOME/hive-logs
nohup.sh
#!/bin/bash

nohup hive --service metastore > /home/hadoop/apache-hadoop/hive/hive-logs/metastore.log 2>&1 &
echo $! > /home/hadoop/apache-hadoop/hive/hive-logs/metastore.pid
nohup hive --service hiveserver2 > /home/hadoop/apache-hadoop/hive/hive-logs/hiveserver2.log 2>&1 &
echo $! > /home/hadoop/apache-hadoop/hive/hive-logs/hiveserver2.pid
hadoop启动
1
2
3
4
5
6
7
8
9
10
11
12
chmod 755 -R /home/hadoop/apache-hadoop
hdfs zkfc -formatZK(格式化zookeeper)
hadoop-daemon.sh start journalnode (启动journalnode)
hdfs namenode -format(格式化namenode metadata)
hadoop-daemon.sh start namenode
hadoop-daemon.sh start namenode -bootstrapStandby( standy namenode)
如果上面log没有问题
stop-dfs.sh
start-dfs.sh
start-yarn.sh (yarn的机器上执行)
mr-jobhistory-daemon.sh start historyserver (namenode01 机器上执行)
start-hbase.sh
spark 安装
1
2
3
4
5
6
7
8
9
10
11
wget http://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.tgz    (每台机器安装)
tar -zxvf scala-2.11.8.tgz && ln -s scala-2.11.8 scala

wget http://apache.fayea.com/mahout/0.12.2/apache-mahout-distribution-0.12.2.tar.gz
tar -zxvf apache-mahout-distribution-0.12.2.tar.gz
ln -s apache-mahout-distribution-0.12.2 mahout
cp ~/apache-hadoop/hadoop/share/hadoop/common/lib/hadoop-lzo-0.4.20-SNAPSHOT.jar ~/apache-hadoop/mahout/lib/

wget http://mirrors.hust.edu.cn/apache/spark/spark-1.6.2/spark-1.6.2.tgz
tar -zxvf spark-1.6.2.tgz && ln -s spark-1.6.2.tgz spark-1.6.2
cd spark/conf
spark-default.conf 配置文件
1
2
3
4
5
6
spark.master                     spark://cd-namenode00.host-shining.com:7077
spark.eventLog.enabled true
spark.eventLog.dir hdfs://shininghadoop/spark
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 5g
spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark-env.sh 配置文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
export SPARK_WORKER_MEMORY=5G
export SPARK_MEM=${SPARK_MEM:-5g}
JAVA_OPTS="$OUR_JAVA_OPTS"
JAVA_OPTS="$JAVA_OPTS-Xms$SPARK_MEM -Xmx$SPARK_MEM"
JAVA_OPTS="$JAVA_OPTS-Djava.library.path=$SPARK_LIBRARY_PATH"
export JAVA_HOME=/usr/java/jdk1.8.0_45
export HADOOP_HOME=/home/hadoop/apache-hadoop/hadoop
export HBASE_HOME=/home/hadoop/apache-hadoop/hbase
export HIVE_HOME=/home/hadoop/apache-hadoop/hive
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HDFS_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export SPARK_HOME=/home/hadoop/apache-hadoop/spark
export SCALA_HOME=/home/hadoop/apache-hadoop/scala
export classpath=$JAVA_HOME/lib/dt.jar:$HBASE_HOME/lib:$MAHOUT_HOME/lib:$PIG_HOME/lib:$HIVE_HOME/lib:$JAVA_HOME/lib/tools.jar:$HADOOP_CONF_DIR:$SPARK_HOME/lib:${HADOOP_HOME}/lib
export HADOOP_CLASSPATH=$JAVA_HOME/lib/dt.jar:$HBASE_HOME/lib/*:$MAHOUT_HOME/lib/*:$PIG_HOME/lib:$HIVE_HOME/lib/*:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/*:$HADOOP_CONF_DIR:$SPARK_HOME/lib/*:${HADOOP_HOME}/lib/*:$HADOOP_CLASSPATH:
export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/$OS_ARCH/server:${HADOOP_HOME}/c++/Linux-$OS_ARCH-$OS_BIT/lib:/usr/local/lib:/usr/lib:${PBS_HOME}/lib:/usr/lib64
slave 节点信息
1
2
3
4
datanode03.host-shining.com
datanode04.host-shining.com
datanode05.host-shining.com
datanode06.host-shining.com
spark 启动
1
2
cd $SPARK_HOME/sbin
./start-all.sh
查看状态

namenode地址 http://name00.host-shining.com:50070
spark 地址 http://name00.host-shining.com:8080
hbase地址 http://name00.host-shining.com:16010
yarn 地址 http://name01.host-shining.com:8088
jobhistory 地址 http://name01.host-shining.com:19888

感觉文章还可以的话,帮忙点点下面的广告哦! 谢谢支持!

感谢您的支持!