flink on k8s
flink on k8s
·
FROM k8s-docker-registry-node:5000/flink:doris-hbase-elasticsearch-jedis
RUN mkdir -p /etc/hadoop
RUN mkdir -p /etc/hive
RUN mkdir -p /opt/ssl/yunkong-test-env
RUN mkdir -p /opt/ssl/yunkong-pro-env
COPY hadoop /etc/hadoop
COPY hive /etc/hive
COPY ssl/client.truststore.jks /opt/ssl
COPY ssl/yunkong-test-ssl /opt/ssl/yunkong-test-env
COPY ssl/yunkong-pro-ssl /opt/ssl/yunkong-pro-env
COPY flink/conf /opt/flink/conf
COPY v2x-1.0-SNAPSHOT.jar /opt
/opt/flink/bin/flink run-application \
--target kubernetes-application \
-Dkubernetes.namespace=default \
-Dkubernetes.service-account=flink-service-account \
-Dkubernetes.cluster-id=flink-k8s-cluster \
-Dkubernetes.container.image=k8s-docker-registry-node:5000/flink-jar:v2x \
-Dstate.checkpoints.dir=hdfs://hdfs-namenode-service:9000/flink/checkpoints \
-Dstate.savepoints.dir=hdfs://hdfs-namenode-service:9000/flink/savepoints \
-Dexecution.checkpointing.interval=120s \
-Dexecution.checkpointing.mode=EXACTLY_ONCE \
-Dstate.backend=filesystem \
-Dkubernetes.rest-service.exposed.type=NodePort \
-Drestart-strategy=failure-rate \
-Drestart-strategy.failure-rate.delay=1s \
-Drestart-strategy.failure-rate.failure-rate-interval=5s \
-Drestart-strategy.failure-rate.max-failures-per-interval=1 \
-Dtaskmanager.memory.process.size=2048m \
-Djobmanager.memory.process.size=1024m \
-Dexternal-resource.limits.kubernetes.cpu=2000m \
-Dexternal-resource.limits.kubernetes.memory=2Gi \
-Dexternal-resource.requests.kubernetes.cpu=1000m \
-Dexternal-resource.requests.kubernetes.memory=1Gi \
-Dkubernetes.taskmanager.cpu=1 \
-Dkubernetes.jobmanager.replicas=1 \
-Dtaskmanager.numberOfTaskSlots=4 \
-c com.xxx.xxx.xxx.xxx \
local:///opt/v2x-1.0-SNAPSHOT.jar
问题记录
User “system:serviceaccount:default:default” cannot get resource “services” in API group “” in the namespace “default”
参考:https://cloud.tencent.com/developer/ask/sof/1920490/answer/2610301
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
namespace: default
name: service-reader
rules:
- apiGroups: ["apps"] # "" indicates the core API group
resources: ["services", "configmaps", "events", "endpoints", "deployments", "persistentvolumes", "persistentvolumeclaims", "storageclasses"]
verbs: ["get", "watch", "list", "create", "update", "patch"]
- apiGroups: [""] # "" indicates the core API group
resources: ["services", "configmaps", "events", "endpoints", "deployments", "persistentvolumes", "persistentvolumeclaims", "storageclasses"]
verbs: ["get", "watch", "list", "create", "update", "patch"]
kubectl create clusterrolebinding service-reader-pod --clusterrole=service-reader --serviceaccount=default:default
root@flink-jobmanager-0:/opt/flink/bin# /opt/flink/bin/flink run-application --target kubernetes-application -Dkubernetes.namespace=default -Dkubernetes.service-account=flink-service-account -Dkubernetes.cluster-id=flink-k8s-cluster -Dkubernetes.container.image=k8s-docker-registry-node:5000/flink-jar:v2x -Dstate.checkpoints.dir=hdfs://hdfs-namenode-service:9000/flink/checkpoints -Dstate.savepoints.dir=hdfs://hdfs-namenode-service:9000/flink/savepoints -Dexecution.checkpointing.interval=120s -Dexecution.checkpointing.mode=EXACTLY_ONCE -Dstate.backend=filesystem -Dkubernetes.rest-service.exposed.type=NodePort -Drestart-strategy=failure-rate -Drestart-strategy.failure-rate.delay=1s -Drestart-strategy.failure-rate.failure-rate-interval=5s -Drestart-strategy.failure-rate.max-failures-per-interval=1 -Dtaskmanager.memory.process.size=2048m -Djobmanager.memory.process.size=1024m -Dexternal-resource.limits.kubernetes.cpu=2000m -Dexternal-resource.limits.kubernetes.memory=2Gi -Dexternal-resource.requests.kubernetes.cpu=1000m -Dexternal-resource.requests.kubernetes.memory=1Gi -Dkubernetes.taskmanager.cpu=1 -Dkubernetes.jobmanager.replicas=1 -Dtaskmanager.numberOfTaskSlots=4 -c com.nufront.bigdata.v2x.job.ExpressCarV2xRTData2PlatformTestEnv1111 local:///opt/v2x-1.0-SNAPSHOT.jar
...
2022-12-02 16:16:56,929 INFO org.apache.flink.kubernetes.KubernetesClusterDescriptor [] - Create flink application cluster flink-k8s-cluster successfully, JobManager Web Interface: http://10.1.0.1:63925
# List running job on the cluster
$ /opt/flink/bin/flink list --target kubernetes-application -Dkubernetes.cluster-id=flink-k8s-cluster
# Cancel running job
$ /opt/flink/bin/flink cancel --target kubernetes-application -Dkubernetes.cluster-id=flink-k8s-cluster <jobId>
在 flink pod 中部署完 flink on k8s 无法连接以下路径
savepoint 恢复
/opt/flink/bin/flink run-application --target kubernetes-application \
-s hdfs://hdfs-namenode-service:9000/flink-savepoints/savepoint-25b5a1-6fd30c8ce08b \
-Dkubernetes.namespace=default \
-Dkubernetes.service-account=flink-service-account \
-Dkubernetes.cluster-id=flink-v2x-doris \
-Dkubernetes.container.image=k8s-docker-registry-node:5000/flink-jar:v2x \
-Dstate.checkpoints.dir=hdfs://hdfs-namenode-service:9000/flink/checkpoints \
-Dstate.savepoints.dir=hdfs://hdfs-namenode-service:9000/flink/savepoints \
-Dexecution.checkpointing.interval=120s \
-Dexecution.checkpointing.mode=EXACTLY_ONCE \
-Dstate.backend=filesystem \
-Dkubernetes.rest-service.exposed.type=NodePort \
-Drestart-strategy=failure-rate \
-Drestart-strategy.failure-rate.delay=1s \
-Drestart-strategy.failure-rate.failure-rate-interval=5s \
-Drestart-strategy.failure-rate.max-failures-per-interval=1 \
-Dtaskmanager.memory.process.size=2048m \
-Djobmanager.memory.process.size=1024m \
-Dexternal-resource.limits.kubernetes.cpu=2000m \
-Dexternal-resource.limits.kubernetes.memory=2Gi \
-Dexternal-resource.requests.kubernetes.cpu=1000m \
-Dexternal-resource.requests.kubernetes.memory=1Gi \
-Dkubernetes.taskmanager.cpu=1 \
-Dkubernetes.jobmanager.replicas=1 \
-Dtaskmanager.numberOfTaskSlots=4 \
-c com.xxx.xxx.xxx \
local:///opt/v2x-1.0-SNAPSHOT.jar
更多推荐
已为社区贡献65条内容
所有评论(0)