准备hadoop镜像

具体参考 https://blog.csdn.net/qq_45744501/article/details/112175428
这里已经打包好部署的镜像了

docker pull janle88/hadoop:latest

部署hadoop到k8s上

ConfigMap 这里不要使用ip,避免datanode连不上namenode,下边的配置有一个ConfigMap

yanl配置配置数量配置明细
ConfigMap1kube-hadoop-conf
Service5hadoop-hdfs-master,hadoop-yarn-master,yarn-node-1,yarn-node-2,yarn-node-3
Pod8hdfs-master,hadoop-datanode-1,hadoop-datanode-2,hadoop-datanode-3 ,yarn-master,yarn-node-1,yarn-node-2,yarn-node-3
apiVersion: v1
kind: ConfigMap
metadata:
  name: kube-hadoop-conf
  namespace: default
data:
  HDFS_MASTER_SERVICE: hadoop-hdfs-master 
  HDOOP_YARN_MASTER: hadoop-yarn-master
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-hdfs-master
spec:
  type: NodePort
  selector:
    app: hdfs-master
  ports:
    - name: rpc
      port: 9000
      targetPort: 9000
    - name: http
      port: 50070
      targetPort: 50070
      nodePort: 32007
---
apiVersion: v1
kind: Pod
metadata:
  name: hdfs-master
  labels:
    app: hdfs-master
spec:
  containers:
    - name: hdfs-master
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070
      env:
        - name: HADOOP_NODE_TYPE
          value: namenode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-1
    labels:
      app: hadoop-datanode-1
spec:
  containers:
    - name: hadoop-datanode-1
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-2
    labels:
      app: hadoop-datanode-2
spec:
  containers:
    - name: hadoop-datanode-2
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-3
    labels:
      app: hadoop-datanode-3
spec:
  containers:
    - name: hadoop-datanode-3
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-yarn-master
spec:
  type: NodePort
  selector:
    app: yarn-master
  ports:
     - name: "8030"
       port: 8030
     - name: "8031"
       port: 8031
     - name: "8032"
       port: 8032
     - name: http
       port: 8088
       targetPort: 8088
       nodePort: 32088
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-master
  labels:
    app: yarn-master
spec:
  containers:
    - name: yarn-master
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070
      env:
        - name: HADOOP_NODE_TYPE
          value: resourceman
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-1
spec:
  clusterIP: None
  selector:
    app: yarn-node-1
  ports:
     - port: 8040
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-2
spec:
  clusterIP: None
  selector:
    app: yarn-node-2
  ports:
     - port: 8040
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-3
spec:
  clusterIP: None
  selector:
    app: yarn-node-3
  ports:
     - port: 8040
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-1
  labels:
    app: yarn-node-1
spec:
  containers:
    - name: yarn-node-1
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041
        - containerPort: 8042
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-2
  labels:
    app: yarn-node-2
spec:
  containers:
    - name: yarn-node-2
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041
        - containerPort: 8042
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-3
  labels:
    app: yarn-node-3
spec:
  containers:
    - name: yarn-node-3
      image: janle88/hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041
        - containerPort: 8042
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always

创建hadoop集群

创建hadoop.yaml文件并启动。
执行命令

kubectl create -f hadoop.yaml

检查是否创建成功

kubectl get configmap -o wide
kubectl get svc -o wide

在这里插入图片描述
浏览器访问HDFS管理界面【http://IP地址:32007】
查看datanode节点
在这里插入图片描述

保证集群节点活跃

由于pod不太稳定,保证pod可以一直运行状态需要将kind类型pod换成ReplicationController,在 spec:
配置哪里添加replicas: 1【保留一个副本】,添加

//如果不添加会报错spec.selector: Required value, spec.template: Required value
template:
    metadata:
      labels:
        name: yarn-master
yanl配置配置数量配置明细
ConfigMap1kube-hadoop-conf
Service5hadoop-hdfs-master,hadoop-yarn-master,yarn-node
pod -> ReplicationController8hdfs-master,hadoop-datanode-1,hadoop-datanode-2,hadoop-datanode-3 ,yarn-master,yarn-node-1,yarn-node-2,yarn-node-3
apiVersion: v1
kind: ReplicationController
metadata:
  name: hdfs-master
  labels:
    name: hdfs-master
spec:
  replicas: 1
  selector:
    name: hdfs-master
  template:
    metadata:
      labels:
        name: hdfs-master
    spec:
      containers:
        - name: hdfs-master
          image:  janle88/hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: namenode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER
      restartPolicy: Always
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: hadoop-datanode
  labels:
    app: hadoop-datanode
spec:
  replicas: 3
  selector:
    name: hadoop-datanode
  template:
    metadata:
      labels:
        name: hadoop-datanode
    spec:
      containers:
        - name: hadoop-datanode
          image:  janle88/hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: datanode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER
      restartPolicy: Always
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: yarn-master
  labels:
    name: yarn-master
spec:
  replicas: 1
  selector:
    name: yarn-master
  template:
    metadata:
      labels:
        name: yarn-master
    spec:
      containers:
        - name: yarn-master
          image:  janle88/hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: resourceman
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER          
      restartPolicy: Always
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: yarn-node
  labels:
    name: yarn-node
spec:
  replicas: 3
  selector:
    name: yarn-node
  template:
    metadata:
      labels:
        name: yarn-node
    spec:
      containers:
        - name: yarn-node
          image: janle88/hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 8040
            - containerPort: 8041   
            - containerPort: 8042        
          env:
            - name: HADOOP_NODE_TYPE
              value: yarnnode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER          
      restartPolicy: Always
      
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node
spec:
  clusterIP: None
  selector:
    name: yarn-node
  ports:
     - port: 8040

发布以后就可以访问hadoop了。
HDFS管理界面 http://10.10.1.22:32088
Yarn的管理界面 http://10.10.1.22:32007

运行测试

配置node访问的host

10.244.2.53     yarn-node-dvf7l
10.244.2.54     yarn-node-njfxh
10.244.1.54     yarn-node-2ll8v
10.244.1.53     yarn-master-vkhgz
10.244.2.52     hadoop-datanode-pvgzk
10.244.1.51     hadoop-datanode-4fr2g
10.244.1.52     hadoop-datanode-b94nb
10.244.2.51     hdfs-master-pgpt2

进入hadoop的namenode节点。
进入hadoop
创建一个文件系统目录。

whereis hadoop
hadoop fs -ls /
cd /usr/local/hadoop
hadoop fs -mkdir /janle
hadoop fs -mkdir /janleInput
hadoop fs -mkdir /janleOutput

在hdfs的管理界面查看创建的目录
在这里插入图片描述
启动一个简单的wordcount的MapReduce

mkdir file
vi test.txt 

输入下属的内容
在这里插入图片描述

//将文件导入输入流
hadoop fs -put file/myTest*.txt hdfsInput 
wget http://www.java2s.com/Code/JarDownload/hadoop-examples/hadoop-examples-1.2.1.jar.zip
//测试
hadoop jar hadoop-examples-1.2.1.jar wordcount /janleInput /janleOutput

总结:
不建议使用k8s部署hadoop集群。由于hadoop集群基于yarn部署的,k8s又类似是一个yarn的基础容器。好多k8s都是动态的pode,在这里会有动态的ip地址之类的。所以好多带来的不便性。

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐