0: 设置系统登录相关
Master要执行
cat$HOME/.ssh/id_rsa.pub>>$HOME/.ssh/authorized_keys
如果用root用户
sed-ri's/^(PermitRootLogin).*$/\1yes/'/etc/ssh/sshd_config
编辑/etc/hosts
127.0.0.1localhost#别把spark1放在这 192.168.100.25spark1#spark1isMaster 192.168.100.26spark2 192.168.100.27spark3 127.0.1.1ubuntu #ThefollowinglinesaredesirableforIPv6capablehosts ::1localhostip6-localhostip6-loopback ff02::1ip6-allnodes ff02::2ip6-allrouters
如果把 spark1 放在/etc/hosts第一行,会发现在slave 有下面的错误
org.apache.hadoop.ipc.Client:Retryingconnecttoserver:spark1/192.168.100.25:9000.Alreadytried0time(s)
然后在spark1 运行
ss-lnt LISTEN0128localhost:9000
会发现监听的是本地. 删除 hosts中的相关文本重新启动hadoop,解决问题
1: 安装java
可以直接apt-get
apt-getinstallpython-software-properties-y add-apt-repositoryppa:webupd8team/java apt-getupdate apt-getinstalloracle-java7-installer
或者下载
wgethttp://download.oracle.com/otn-pub/java/jdk/7u80-b15/jdk-7u80-linux-x64.tar.gz mkdir/usr/lib/jvm tarxvfjdk-7u80-linux-x64.tar.gz mvjdk1.7.0_80/usr/lib/jvm #配置相关路径 update-alternatives--install"/usr/bin/java""java""/usr/lib/jvm/jdk1.7.0_80/bin/java"1 update-alternatives--install"/usr/bin/javac""javac""/usr/lib/jvm/jdk1.7.0_80/bin/javac"1 update-alternatives--install"/usr/bin/javaws""javaws""/usr/lib/jvm/jdk1.7.0_80/bin/javaws"1 update-alternatives--configjava #验证一下 java-version javac-version javaws-version
添加环境变量
cat>>/etc/profile<<EOF exportJAVA_HOME=/usr/lib/jvm/jdk1.7.0_80 exportJRE_HOME=/usr/lib/jvm/jdk1.7.0_80/jre exportCLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib exportPATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin EOF
2: 安装 hadoop
tarxvfhadoop-2.7.3.tar.gz mvhadoop-2.7.3/usr/local/hadoop cd/usr/local/hadoop mkdir-phdfs/{data,name,tmp}
添加环境变量
cat>>/etc/profile<<EOF exportHADOOP_HOME=/usr/local/hadoop exportPATH=$PATH:$HADOOP_HOME/bin EOF
编辑 hadoop-env.sh 文件
exportJAVA_HOME=/usr/lib/jvm/jdk1.7.0_80#只改了这一行
编辑 core-site.xml 文件
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://spark1:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/usr/local/hadoop/hdfs/tmp</value> </property> </configuration>
编辑hdfs-site.xml 文件
<configuration> <property> <name>dfs.namenode.name.dir</name> <value>/usr/local/hadoop/hdfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/usr/local/hadoop/hdfs/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> </configuration>
编辑mapred-site.xml 文件
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
编辑yarn-site.xml 文件
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>spark1</value> </property> <!--property> 别添加这个属性,添加了可能出现下面的错误: Problembindingto[spark1:0]java.net.BindException:Cannotassignrequestedaddress <name>yarn.nodemanager.hostname</name> <value>spark1</value> </property--> </configuration>
https://hadoop.apache.org/docs/r2.7.3/
编辑masters 文件
echospark1>masters
编辑 slaves 文件
spark1 spark2 spark3
安装好后,使用rsync 把相关目录及/etc/profile同步过去即可
启动hadoop dfs
./sbin/start-dfs.sh
初始化文件系统
hadoopnamenode-format
启动 yarn
./sbin/start-yarn.sh
检查spark1相关进程
root@spark1:/usr/local/spark/conf#jps 1699NameNode 8856Jps 2023SecondaryNameNode 2344NodeManager 1828Datanode 2212ResourceManager
spark2 spark3 也要类似下面的运程
root@spark2:/tmp#jps 3238Jps 1507Datanode 1645NodeManager
可以打开web页面查看
http://192.168.100.25:50070
测试hadoop
hadoopfs-mkdir/testin hadoopfs-put~/str.txt/testin cd/usr/local/hadoop hadoopjar./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jarwordcount/testin/str.txttestout
结果如下:
hadoopjar./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jarwordcount/testin/str.txttestout 17/02/2411:20:59INFOclient.RMProxy:ConnectingtoResourceManageratspark1/192.168.100.25:8032 17/02/2411:21:01INFOinput.FileInputFormat:Totalinputpathstoprocess:1 17/02/2411:21:01INFOmapreduce.JobSubmitter:numberofsplits:1 17/02/2411:21:02INFOmapreduce.JobSubmitter:Submittingtokensforjob:job_1487839487040_0002 17/02/2411:21:06INFOimpl.YarnClientImpl:Submittedapplicationapplication_1487839487040_0002 17/02/2411:21:06INFOmapreduce.Job:Theurltotrackthejob:http://spark1:8088/proxy/application_1487839487040_0002/ 17/02/2411:21:06INFOmapreduce.Job:Runningjob:job_1487839487040_0002 17/02/2411:21:28INFOmapreduce.Job:Jobjob_1487839487040_0002runninginubermode:false 17/02/2411:21:28INFOmapreduce.Job:map0%reduce0% 17/02/2411:22:00INFOmapreduce.Job:map100%reduce0% 17/02/2411:22:15INFOmapreduce.Job:map100%reduce100% 17/02/2411:22:17INFOmapreduce.Job:Jobjob_1487839487040_0002completedsuccessfully 17/02/2411:22:17INFOmapreduce.Job:Counters:49 FileSystemCounters FILE:Numberofbytesread=212115 FILE:Numberofbyteswritten=661449 FILE:Numberofreadoperations=0 FILE:Numberoflargereadoperations=0 FILE:Numberofwriteoperations=0 HDFS:Numberofbytesread=377966 HDFS:Numberofbyteswritten=154893 HDFS:Numberofreadoperations=6 HDFS:Numberoflargereadoperations=0 HDFS:Numberofwriteoperations=2 JobCounters Launchedmaptasks=1 Launchedreducetasks=1 Data-localmaptasks=1 Totaltimespentbyallmapsinoccupiedslots(ms)=23275 Totaltimespentbyallreducesinoccupiedslots(ms)=11670 Totaltimespentbyallmaptasks(ms)=23275 Totaltimespentbyallreducetasks(ms)=11670 Totalvcore-millisecondstakenbyallmaptasks=23275 Totalvcore-millisecondstakenbyallreducetasks=11670 Totalmegabyte-millisecondstakenbyallmaptasks=23833600 Totalmegabyte-millisecondstakenbyallreducetasks=11950080 Map-ReduceFramework Mapinputrecords=1635 Mapoutputrecords=63958 Mapoutputbytes=633105 Mapoutputmaterializedbytes=212115 Inputsplitbytes=98 Combineinputrecords=63958 Combineoutputrecords=14478 Reduceinputgroups=14478 Reduceshufflebytes=212115 Reduceinputrecords=14478 Reduceoutputrecords=14478 SpilledRecords=28956 ShuffledMaps=1 FailedShuffles=0 MergedMapoutputs=1 GCtimeelapsed(ms)=429 cputimespent(ms)=10770 Physicalmemory(bytes)snapshot=455565312 Virtualmemory(bytes)snapshot=1391718400 Totalcommittedheapusage(bytes)=277348352 ShuffleErrors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 FileInputFormatCounters BytesRead=377868 FileOutputFormatCounters BytesWritten=154893
3: 安装 scala
tarxvfscala-2.11.8.tgz mvscala-2.11.8/usr/local/scala
添加环境变量
cat>>/etc/profile<<EOF exportSCALA_HOME=/usr/local/scala exportPATH=$PATH:$SCALA_HOME/bin EOF
测试
source/etc/profile scala-version Scalacoderunnerversion2.11.8--Copyright2002-2016,LAMP/EPFL
4: 安装 spark
tarxvfspark-2.1.0-bin-hadoop2.7.tgz mvspark-2.1.0-bin-hadoop2.7/usr/local/spark
添加环境变量
cat>>/etc/profile<<EOF exportSPARK_HOME=/usr/local/spark exportPATH=$PATH:$SPARK_HOME/bin exportLD_LIBRARY_PATH=$HADOOP_HOME/lib/native EOF
exportLD_LIBRARY_PATH=$HADOOP_HOME/lib/native #这一条不添加的话在运行spark-shell时会出现下面的错误 NativeCodeLoader:Unabletoloadnative-hadooplibraryforyourplatform...usingbuiltin-javaclasseswhereapplicable
编辑 spark-env.sh
SPARK_MASTER_HOST=spark1 HADOOP_CONF_DIR=/usr/locad/hadoop/etc/hadoop
编辑 slaves
spark1 spark2 spark3
启动 spark
./sbin/start-all.sh
此时在spark1上运行jps应该如下,多了 Master 和 Worker
root@spark1:/usr/local/spark/conf#jps 1699NameNode 8856Jps 7774Master 2023SecondaryNameNode 7871Worker 2344NodeManager 1828Datanode 2212ResourceManager
spark2 和 spark3 则多了 Worker
root@spark2:/tmp#jps 3238Jps 1507Datanode 1645NodeManager 3123Worker
可以打开web页面查看
http://192.168.100.25:8080/
运行 spark-shell
root@spark1:/usr/local/spark/conf#spark-shell UsingSpark'sdefaultlog4jprofile:org/apache/spark/log4j-defaults.properties Settingdefaultloglevelto"WARN". Toadjustlogginglevelusesc.setLogLevel(newLevel).ForSparkR,usesetLogLevel(newLevel). 17/02/2411:55:46WARNSparkContext:SupportforJava7isdeprecatedasofSpark2.0.0 17/02/2411:56:17WARNObjectStore:Failedtogetdatabaseglobal_temp,returningNoSuchObjectException SparkcontextWebUIavailableathttp://192.168.100.25:4040 Sparkcontextavailableas'sc'(master=local[*],appid=local-1487908553475). Sparksessionavailableas'spark'. Welcometo ______ /__/__________//__ _\\/_\/_`/__/'_/ /___/.__/\_,_/_//_/\_\version2.1.0 /_/ UsingScalaversion2.11.8(JavaHotSpot(TM)64-BitServerVM,Java1.7.0_80) Typeinexpressionstohavethemevaluated. Type:helpformoreinformation. scala>:help
此时可以打开spark 查看
http://192.168.100.25:4040/environment/
至此完成.