Hadoop 2.9.2 HA安装
Hadoop 2.9.2 HA安装,命令顺序修正
关于hadoop 的几种安装方式,以及理论,这里已经说的很清楚。
但其中Hadoop HA 的安装,命令顺序有所不对,以及不全,本篇文章以楼主亲身经历,来补充完整,使刚开始的同学可以顺利进行安装。
各种配置文件在文章末尾有,话不多说,开始。
HA集群 启动顺序,无特殊说明的命令都在主节点上执行
1 先启动各个节点的 zookeeper。
bin/zkServer.sh start
2 启动各个节点的 journalnode
sbin/hadoop-daemon.sh start journalnode
3 关闭防火墙
4 格式化 第一台namenode
bin/hdfs namenode -format
5 第一台启动namenode
sbin/hadoop-demo.sh start namenode
6 在第二台namenode上
bin/hdfs namenode -bootstrapStandby
sbin/hadoop-demo.sh start namenode
7 强制将一台namenode ** (这是不好的行为)
bin/hdfs haadmin -transitionToActive -forcemanual nn1
(此时可以打开web页面查看集群信息了)
8 关闭现有hadoop进程
sbin/stop-dfs.sh
9 创建一个zNode
bin/hdfs zkfc -formatZK
10 启动hadoop进程
sbin/start-dfs.sh
11 配置mapred-site.xml,具体个文件配置信息在文章末尾
bin/zkServer.sh start
12 启动yarn
sbin/start-yarn.sh
13 从节点启动RM
./sbin/yarn-daemon.sh start resourcemanager
14 执行wordcount程序
./bin/hdfs dfs -mkdir input
./bin/hdfs dfs -put README.txt /input
./bin/yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.2.jar wordcount /input/README.txt /output
15 启动JobHistoryServer,必须在主节点启动
sbin/mr-jobhistory-daemon.sh start historyserver
各配置文件
core-site.xml
<configuration>
<property>
<!-- hdfs 地址,ha中是连接到nameservice -->
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<property>
<!-- -->
<name>hadoop.tmp.dir</name>
<value>/opt/modules/ha/hadoop-2.9.2/data/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>elink1:2181,elink2:2181,elink3:2181</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<!-- 为namenode集群定义一个services name -->
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<!-- nameservice 包含哪些namenode,为各个namenode起名 -->
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<property>
<!-- 名为nn1的namenode 的rpc地址和端口号,rpc用来和datanode通讯 -->
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>elink1:8020</value>
</property>
<property>
<!-- 名为nn2的namenode 的rpc地址和端口号,rpc用来和datanode通讯 -->
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>elink2:8020</value>
</property>
<property>
<!--名为nn1的namenode 的http地址和端口号,web客户端 -->
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>elink1:50070</value>
</property>
<property>
<!--名为nn2的namenode 的http地址和端口号,web客户端 -->
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>elink2:50070</value>
</property>
<property>
<!-- namenode间用于共享编辑日志的journal节点列表 -->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://elink1:8485;elink2:8485;elink3:8485/ns1</value>
</property>
<property>
<!-- journalnode 上用于存放edits日志的目录 -->
<name>dfs.journalnode.edits.dir</name>
<value>/opt/modules/ha/hadoop-2.9.2/tmp/data/dfs/jn</value>
</property>
<property>
<!-- 客户端连接可用状态的NameNode所用的代理类 -->
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<!-- -->
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
<property>
<!-- 启用resourcemanager的ha功能 -->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<!-- 为resourcemanage ha 集群起个id -->
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster</value>
</property>
<property>
<!-- 指定resourcemanger ha 有哪些节点名 -->
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm12,rm13</value>
</property>
<property>
<!-- 指定第一个节点的所在机器 -->
<name>yarn.resourcemanager.hostname.rm12</name>
<value>elink1</value>
</property>
<property>
<!-- 指定第二个节点所在机器 -->
<name>yarn.resourcemanager.hostname.rm13</name>
<value>elink2</value>
</property>
<property>
<!-- 客户端通过该地址向RM提交对应用程序操作 -->
<name>yarn.resourcemanager.address.rm12</name>
<value>elink1:8032</value>
</property>
<property>
<!--ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。 -->
<name>yarn.resourcemanager.scheduler.address.rm12</name>
<value>elink1:8030</value>
</property>
<property>
<!-- RM HTTP访问地址,查看集群信息-->
<name>yarn.resourcemanager.webapp.address.rm12</name>
<value>elink1:8088</value>
</property>
<property>
<!-- NodeManager通过该地址交换信息 -->
<name>yarn.resourcemanager.resource-tracker.address.rm12</name>
<value>elink1:8031</value>
</property>
<property>
<!--管理员通过该地址向RM发送管理命令 -->
<name>yarn.resourcemanager.admin.address.rm12</name>
<value>elink1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rm12</name>
<value>elink1:23142</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm13</name>
<value>elink2:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm13</name>
<value>elink2:8030</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm13</name>
<value>elink2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm13</name>
<value>elink2:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm13</name>
<value>elink2:8033</value>
</property>
<property>
<name>yarn.resourcemanager.ha.admin.address.rm13</name>
<value>elink2:23142</value>
</property>
<property>
<!-- 指定resourcemanger ha 所用的zookeeper 节点 -->
<name>yarn.resourcemanager.zk-address</name>
<value>elink1:2181,elink2:2181,elink3:2181</value>
</property>
<property>
<!-- -->
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<!-- -->
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>
mapred-site.xml
查看对应的classpath的值
yarn classpath
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>elink1:10020</value>
<description>MapReduce JobHistory Server IPC host:port</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>elink1:19888</value>
<description>MapReduce JobHistory Server Web UI host:port</description>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/opt/modules/ha/hadoop-2.9.2/etc/hadoop:/opt/modules/ha/hadoop-2.9.2/etc/hadoop:/opt/modules/ha/hadoop-2.9.2/etc/hadoop:/opt/modules/ha/hadoop-2.9.2/share/hadoop/common/lib/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/common/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/hdfs:/opt/modules/ha/hadoop-2.9.2/share/hadoop/hdfs/lib/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/hdfs/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/yarn:/opt/modules/ha/hadoop-2.9.2/share/hadoop/yarn/lib/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/yarn/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/mapreduce/lib/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/mapreduce/*:/opt/modules/ha/hadoop-2.9.2/contrib/capacity-scheduler/*.jar:/opt/modules/ha/hadoop-2.9.2/share/hadoop/yarn/*:/opt/modules/ha/hadoop-2.9.2/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
其他补充
配置slaves文件,配置 hadoop-env.sh、mapred-env.sh、yarn-env.sh文件的JAVA_HOME参数