
spark-3.3.2(Standalone)集群安装-大数据学习系列(三)
链接: https://pan.baidu.com/s/1-GAeyyDOPjhsWhIp_VV7yg?链接: https://pan.baidu.com/s/1y-yXZCQdmlssdMNMpFlqnw?切换到k8s-node1、k8s-node2 验证是否安装成功。01-spark(Standalone)集群安装。切换到k8s-node1机器上操作(分发环境)1.1 在集群(各机器上都执行!
spark-3.3.2(Standalone)集群安装
前置:集群规划
机器信息 | Hostname | k8s-master | k8s-node1 | k8s-node2 |
外网IP | 106.15.186.55 | 139.196.15.28 | 47.101.63.122 | |
内网IP | 172.25.73.65 | 172.25.73.64 | 172.25.73.66 | |
master | ||||
work1 | wrok2 | work3 |
step1 安装前准备
- 安装Scala
从官网(The Scala Programming Language)下载 Scala版本
链接: https://pan.baidu.com/s/1-GAeyyDOPjhsWhIp_VV7yg?pwd=3fws 提取码: 3fws
1.1 在集群(各机器上都执行!!!)
#创建安装目录
mkdir -p /home/install/scala
mkdir -p /home/module/scala
#最终安装目录为/home/module/scala/scala-2.12.17/
#向 /etc/profile 文件追加如下内容
echo "export SCALA_HOME=/home/module/scala/scala-2.12.17" >> /etc/profile
echo "export PATH=:\$PATH:\${SCALA_HOME}/bin:\${SCALA_HOME}/sbin" >> /etc/profile
#使得配置文件生效
source /etc/profile
1.2 切换到k8s-node1机器上操作(分发环境)
cd /home/install/scala
#上传 scala-2.12.17.tgz
#解压压缩包到 安装目录
tar -xvf /home/install/scala/scala-2.12.17.tgz -C /home/module/scala/
#测试是否安装成功
scala -version
#最终安装目录为/home/module/scala/scala-2.12.17/ 分发到各机器目录
#复制到k8s-node1
scp -r /home/module/scala/ root@k8s-node1:/home/module/scala/
#复制到k8s-node2
scp -r /home/module/scala/ root@k8s-node2:/home/module/scala/
1.3 切换到k8s-node1、k8s-node2 验证是否安装成功
#测试是否安装成功
scala -version
step2 安装spark环境
1.下载spark安装包
从官网Apache Spark™ - Unified Engine for large-scale data analytics
下载 spark-3.3.2-bin-hadoop3.tgz 编译好的版本
链接: https://pan.baidu.com/s/1y-yXZCQdmlssdMNMpFlqnw?pwd=dfac 提取码: dfac
- 创建spark安装目录并解压
2.1切换到k8s-master执行
#创建安装目录
mkdir -p /home/install/spark
mkdir -p /home/module/spark
#上传 spark-3.3.2-bin-hadoop3.tgz 到 /home/install/spark目录下
#进入安装目录
cd /home/install/spark
#解包目录 tar -zxvf /home/install/spark/spark-3.3.2-bin-hadoop3.tgz -C /home/module/spark/
#将最终安装目录更名 最终安装目录为/home/module/spark/spark-3.3.2
mv /home/module/spark/spark-3.3.2-bin-hadoop3 /home/module/spark/spark-3.3.2
#在hadoop上创建Sparklog目录
#(执行前请确保hadoop已经启动 如果未启动请使用命令)
#start-all.sh
#在hdfs上创建spark的日志文件夹
hadoop fs -mkdir -p /spark/eventLog
#授予hdfs上的spark日志文件夹执行权限
hadoop fs -chmod 777 /spark/eventLog
2.2切换到k8s-node1执行
#创建安装目录
mkdir -p /home/install/spark
mkdir -p /home/module/spark
2.3切换到k8s-node2执行
#创建安装目录
mkdir -p /home/install/spark
mkdir -p /home/module/spark
3.修改配置文件
切换到k8s-master中修改配置文件
3.1 spark-env.sh
#进入目录
cd /home/module/spark/spark-3.3.2/conf
#创建 spark-env.sh 文件
cat > spark-env.sh << EOF
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.362.b08-1.el7_9.x86_64/jre
export SCALA_HOME=/home/module/scala/scala-2.12.17
export HADOOP_HOME=/home/module/hadoop/hadoop-3.3.4
#Hadoop 配置文件地址
export HADOOP_CONF_DIR=/home/module/hadoop/hadoop-3.3.4/etc/hadoop
#Spark master 相关设置
#指定Spark Master的启动地址
export SPARK_MASTER_HOST=k8s-master
#指定Spark Master的通讯端口
export SPARK_MASTER_PORT=7077
#指定Spark Master的WebUI 端口
export SPARK_MASTER_WEBUI_PORT=8001
#指定每个work能使用多少核cpu
export SPARK_WORKER_CORES=2
#指定每个work能使用机器多少内存
export SPARK_WORKER_MEMORY=1G
#指定work的端口
export SPARK_WORKER_PORT=7078
#配置Spark程序日志记录的位置 和日志访问的端口
export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://k8s-master:8020/spark/eventLog -Dspark.history.fs.cleaner.enabled=true -Dspark.history.ui.port=8002"
EOF
3.2 spark-defaults.conf
#进入配置文件目录
cd /home/module/spark/spark-3.3.2/conf
#创建 spark-defaults.conf 文件
cat > spark-defaults.conf << EOF
#spark://HOST:PORT 配置指定的Spark standalone cluster master
spark.master spark://k8s-master:7077
#启用日志存储
spark.eventLog.enabled true
#日志存储位置
spark.eventLog.dir hdfs://k8s-master:8020/spark/eventLog
# 设置spark日志是否启动压缩
spark.eventLog.compress true
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 1g
EOF
3.3 workers
#进入配置文件目录
cd /home/module/spark/spark-3.3.2/conf
#创建 workers 文件
cat > workers << EOF
k8s-master
k8s-node1
k8s-node2
EOF
3.3 log4j.properties
该修改为【可选修改】
主要将日志的级别调整成 warn避免日志过多
rootLogger.level = info 修改为 rootLogger.level = warn
cat > log4j2.properties << EOF
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Set everything to be logged to the console
rootLogger.level = info
rootLogger.appenderRef.stdout.ref = console
# In the pattern layout configuration below, we specify an explicit `%ex` conversion
# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
# class packaging information. That extra information can sometimes add a substantial
# performance overhead, so we disable it in our default logging config.
# For more information, see SPARK-39361.
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
# Set the default spark-shell/spark-sql log level to WARN. When running the
# spark-shell/spark-sql, the log level for these classes is used to overwrite
# the root logger's log level, so that the user can have different defaults
# for the shell and regular Spark apps.
logger.repl.name = org.apache.spark.repl.Main
logger.repl.level = warn
logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
logger.thriftserver.level = warn
# Settings to quiet third party logs that are too verbose
logger.jetty1.name = org.sparkproject.jetty
logger.jetty1.level = warn
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
logger.jetty2.level = error
logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.replexprTyper.level = info
logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.replSparkILoopInterpreter.level = info
logger.parquet1.name = org.apache.parquet
logger.parquet1.level = error
logger.parquet2.name = parquet
logger.parquet2.level = error
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.RetryingHMSHandler.level = fatal
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.FunctionRegistry.level = error
# For deploying Spark ThriftServer
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
appender.console.filter.1.type = RegexFilter
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
appender.console.filter.1.onMatch = deny
appender.console.filter.1.onMismatch = neutral
EOF
- 分发文件
切换到k8s-master执行
#复制到k8s-node1
scp -r /home/module/spark/spark-3.3.2 root@k8s-node1:/home/module/spark/spark-3.3.2
#复制到k8s-node2
scp -r /home/module/spark/spark-3.3.2 root@k8s-node2:/home/module/spark/spark-3.3.2
- 启动spark验证
#进入spark 的 sbin文件的目录
cd /home/module/spark/spark-3.3.2/sbin/
#启动spark
/home/module/spark/spark-3.3.2/sbin/start-all.sh
#关闭spark
#/home/module/spark/spark-3.3.2/sbin/stop-all.sh
#启动spark history
/home/module/spark/spark-3.3.2/sbin/start-history-server.sh
#关闭spark history
#/home/module/spark/spark-3.3.2/sbin/stop-history-server.sh
jps -m
ps:关闭的方式
step3 spark UI 环境验证
1.查看saprk web ui
|
2.查看saprk history server
该服务的默认端口是18080 但我们前面已经通过设置修改成 8002了
更多推荐
所有评论(0)