armbian搭建hadoop集群

 

armbianmonitor –m
htop

固定IP

nano /etc/network/interfaces
allow-hotplug eth0
no-auto-down eth0
#iface eth0 inet dhcp
iface eth0 inet static
address 172.20.20.94
netmask 255.255.254.0
gateway 172.20.20.1
dns-nameservers 172.20.20.1 8.8.8.8
systemctl restart networking

远程桌面

apt-get install xrdp xorgxrdp –y

单机测试

cd /opt/hadoop/
mkdir input
cp etc/hadoop/*.xml input/
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar grep input output 'dfs[a-z.]+'
cat output/*
rm -rf output/
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar wordcount /input /output
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar pi 3 10000000

完全分布式

apt-get install ntpdate –y
ntpdate ntp.aliyun.com
hostnamectl set-hostname hd01
nano /etc/hosts
172.20.20.94  hd01
172.20.20.92  hd02

免密登录

ssh-keygen
ssh-copy-id localhost
ssh-copy-id hd02

安装JAVA

apt install openjdk-8-jre-headless openjdk-8-jdk-headless –y
java –version
jps
whereis java
echo $JAVA_HOME

配置环境变量

nano /etc/profile
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
export HADOOP_HOME=/opt/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
hadoop version

复制环境变量,必须系统位数一样、平台一样才能复制profile

scp /etc/profile hd02:/etc/profile

复制hosts解析

scp /etc/hosts hd02:/etc/
cd /opt/hadoop/
mkdir tmp/dfs/name –p
mkdir tmp/dfs/data

修改6个配置文件

1nano etc/hadoop/hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64
export HADOOP_HOME=/opt/hadoop

2nano etc/hadoop/core-site.xml

<configuration>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/opt/hadoop/dfs/tmp</value>
                <description>Abase for other temporary directories.</description>
        </property>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://hd01:9000</value>
        </property>
</configuration>

3nano etc/hadoop/hdfs-site.xml

<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/hadoop/dfs/name</value>
    </property>
 
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/hadoop/dfs/data</value>
    </property>
 
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
</configuration>

4vim etc/hadoop/yarn-site.xml

<configuration>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hd01</value>
    </property>
 
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>
</configuration>

5vim etc/hadoop/mapred-site.xml

<configuration>
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
   <name>mapred.job.tracker</name>
   <value>hd01:9001</value>
</property>
<property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
</configuration>

6nano hadoop/etc/hadoop/workers

hd01
hd02

修改4个配置文件,使其能用root用户启动

vi sbin/start-dfs.sh
vi sbin/stop-dfs.sh

第二行添加

HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi sbin/start-yarn.sh
vi sbin/stop-yarn.sh

第二行添加

YARN_RESOURCEMANAGER_USER=root
HDFS_DATANODE_SECURE_USER=yarn
YARN_NODEMANAGER_USER=root

复制整个hadoop目录

scp -r /opt/hadoop/ hd02:/opt

在主节点格式化namenode, 格式化一次就可以了

hdfs namenode –format
jps
start-all.sh
stop-all.sh
hdfs --daemon start/stop datanode/nanmenode
netstat -antup | grep java
netstat -antup | grep 9870
netstat -antup | grep 8088
jps

如果datanode起不来,是因为格式化多次namenode导致clusterID不一致

cat tmp/dfs/name/current/VERSION
nano tmp/dfs/data/current/VERSION

或者删除所有datanode节点下的data数据,但这样hdfs里面存的文件也会丢失。

rm -rf /opt/hadoop/dfs/data/*

常用命令以及grepwordcount测试

hadoop fs -mkdir /input
hadoop fs -put etc/hadoop/*.xml /input
hadoop fs -ls /input
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar grep /input /output 'dfs[a-z.]+'
hadoop fs -cat /output/*
hadoop fs -rm -r /output
hadoop fs -ls /
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar wordcount /input /output
hadoop fs -cat /output/*

计算pi

hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.0.jar pi 10 100

 


最后编辑于:2021/01/16作者: admin

发表评论