【Hadoop】Hadoop安装伪分布配置
1创建Hadoop用户
su
useradd -m hadoop -s /bin/bash 添加Hadoop用户 设置shell为bash
passwd hadoop 设置密码
2添加权限
visudo
添加 hadoop ALL=(ALL) ALL
3SSH及免密登陆
rpm -qa | grep ssh # 查询所有安装组件并且只显示ssh相关组件
//如未安装执行
sudo yum install openssh-clients #安装ssh客户端
sudo yum install openssh-server#安装ssh服务端
ssh localhost #ssh 测试登陆到本机
//生成**,并将**加入到授权
exit 退出登录
cd ~/.ssh/
ssh-****** -t rsa 回车 回车
cat id_rsa.pub >> authorized_keys #重定向将id_rsa.pub结果加入到到authorized_keys中
chmod 600 ./authorized_keys #更改权限
4未安装Java环境则通过yun进行环境安装
sudo yum install java-1.7.0-openjdk java-1.7.0-openjdk-devel
vim ~/.bashrc #编辑bashrc
添加export JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk
source ~/.bashrc
Java -version 查看Java版本
5Hadoop安装
下载Hadoop到下载目录
sudo tar -zxf ~/下载/hadoop-2.6.5.tar.gz -C /usr/local #解压Hadoop到/usr/local
cd /usr/local/
sudo mv ./hadoop-2.6.5/ ./hadoop 重命名Hadoop目录
sudo chown -R hadoop:hadoop ./hadoop 增加权限
//检查
cd hadoop 移动到Hadoop目录
./bin/hadoop version 查看Hadoop版本
6运行例子 grep查找
cd /usr/local/hadoop
mkdir ./input
cp ./etc/hadoop/*.xml ./input
./bin/hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep ./input ./output 'dfs[a-z.]+'
cat ./output/*
删除output
rm -r ./output
7伪分布式配置
gedit ~/.bashrc
添加一下到bashrc中
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
//使配置生效
source ~/.bashrc
//配置core-site.xml
gedit ./etc/hadoop/core-site.xml
添加
- <configuration>
- <property>
- <name>hadoop.tmp.dir</name>
- <value>file:/usr/local/hadoop/tmp</value> rm -r ./output><!--临时文件目录-->
- <description>Abase for other temporary directories.</description>
- </property>
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://localhost:9000</value>
- </property>
- </configuration>
//配置 hdfs-site.xml
- <configuration>
- <property>
- <name>dfs.replication</name>
- <value>1</value> <!--指定副本数量-->
- </property>
- <property>
- <name>dfs.namenode.name.dir</name>
- <value>file:/usr/local/hadoop/tmp/dfs/name</value> <!--faimage存储目录-->
- </property>
- <property>
- <name>dfs.datanode.data.dir</name>
- <value>file:/usr/local/hadoop/tmp/dfs/data</value> <!--数据块存储目录-->
- </property>
- </configuration>
./bin/hdfs namenode -format rm -r ./output rm -r ./output #NameNode格式化
开启 名称节点和数据节点。
./sbin/start-dfs.sh
//浏览器访问 Web 界面 http://localhost:50070 查看 NameNode 和 Datanode 信息
./sbin/stop-dfs.sh #关闭
8伪分布式实例运行
./bin/hdfs dfs -mkdir -p /user/hadoop #创建HDFS用户目录
9 yarn 启动
mv ./etc/hadoop/mapred-site.xml.template ./etc/hadoop/mapred-site.xml #重命名xml文件、
gedit ./etc/hadoop/mapred-site.xml #打开xml文件添加以下
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
修改yarn-site.xml 添加
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
启动yarn
./sbin/start-yarn.sh
./sbin/mr-jobhistory-daemon.sh start historyserver #开启历史服务