k8s资源使用统计并邮件通知
k8s资源使用统计一、定义模型(表)二、收集node信息处理插入数据库三、从数据库读取后发送邮件前言:在node节点request可供请求的资源不足时,新建pod无法正常运行,特此可以每天检查一次来,掌控资源利用情况。通过收集node信息来进行数据处理,将收集的request和limit信息放进数据库,通过查询数据库进行数据展示一、定义模型(表)CREATE TABLE `ecs_info` (`
·
k8s资源使用统计
- 前言:
在node节点request可供请求的资源不足时,新建pod无法正常运行,特此可以每天检查一次来,掌控资源利用情况。
通过收集node信息来进行数据处理,将收集的request和limit信息放进数据库,通过查询数据库进行数据展示
一、定义模型(表)
CREATE TABLE `ecs_info` (
`ecs_ip` varchar(255) DEFAULT NULL COMMENT '',
`ecs_InstanceId` varchar(32) DEFAULT NULL COMMENT 'ecs实例id',
`ecs_name` varchar(255) DEFAULT NULL COMMENT 'ecs实例名称',
`ecs_cpu` varchar(255) DEFAULT NULL COMMENT 'ecscpu',
`ecs_memory` varchar(255) DEFAULT NULL COMMENT 'ecs内存',
`ecs_zone` varchar(255) DEFAULT NULL COMMENT 'ecs地域',
`day` varchar(255) NOT NULL DEFAULT '日期',
`create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
KEY `ecs_ip` (`ecs_ip`),
KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
CREATE TABLE `node_info` (
`node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
`node_cpu` varchar(255) DEFAULT NULL COMMENT 'nodecpu',
`node_ephemeral_storage` varchar(32) DEFAULT NULL COMMENT 'node存储',
`hugepages_1Gi` varchar(255) DEFAULT NULL COMMENT '',
`hugepages_2Mi` varchar(255) DEFAULT NULL COMMENT '',
`node_memory` varchar(255) DEFAULT NULL COMMENT 'node内存',
`node_pods` varchar(255) DEFAULT NULL COMMENT 'pods',
`day` varchar(255) NOT NULL DEFAULT '日期',
`create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
KEY `node_ip` (`node_ip`),
KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
CREATE TABLE `node_usage_info` (
`node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
`cpu_requests` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu',
`cpu_requests_usage` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu使用',
`cpu_limits` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu',
`cpu_limits_usage` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu使用',
`memory_requests` varchar(255) DEFAULT NULL COMMENT '请求内存',
`memory_requests_usage` varchar(255) DEFAULT NULL COMMENT '请求内存使用',
`memory_limits` varchar(255) DEFAULT NULL COMMENT '限制内存',
`memory_limits_usage` varchar(255) DEFAULT NULL COMMENT '限制内存使用',
`ephemeral_storage_requests` varchar(255) DEFAULT NULL COMMENT '请求存储',
`ephemeral_storage_requests_usage` varchar(255) DEFAULT NULL COMMENT '',
`ephemeral_storage_limits` varchar(255) DEFAULT NULL COMMENT '限制存储',
`ephemeral_storage_limits_usage` varchar(255) DEFAULT NULL COMMENT '',
`day` varchar(255) NOT NULL DEFAULT '日期',
`create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
KEY `node_ip` (`node_ip`),
KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
CREATE TABLE `node_pod_usage_info` (
`node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
`namespace` varchar(255) DEFAULT NULL COMMENT '命名空间',
`podname` varchar(255) DEFAULT NULL COMMENT 'pod名称',
`cpu_requests` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu',
`cpu_requests_usage` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu使用',
`cpu_limits` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu',
`cpu_limits_usage` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu使用',
`memory_requests` varchar(255) DEFAULT NULL COMMENT '请求内存',
`memory_requests_usage` varchar(255) DEFAULT NULL COMMENT '请求内存使用',
`memory_limits` varchar(255) DEFAULT NULL COMMENT '限制内存',
`memory_limits_usage` varchar(255) DEFAULT NULL COMMENT '限制内存使用',
`age` varchar(255) DEFAULT NULL COMMENT 'node运行时间',
`day` varchar(255) NOT NULL DEFAULT '日期',
`create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
KEY `node_ip` (`node_ip`),
KEY `namespace` (`namespace`),
KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
二、收集node信息处理插入数据库
#!/usr/bin/env python
#coding=utf-8
import threading
import time,sys,os
import sys,subprocess
from email.mime.text import MIMEText
import smtplib
from email.mime.multipart import MIMEMultipart
from email.header import Header
import requests
import smtplib
import json,ast,yaml
import logging
from logging import handlers
from multiprocessing import Lock
from http.server import BaseHTTPRequestHandler, HTTPServer
import pymysql
from apscheduler.schedulers.blocking import BlockingScheduler
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcms.request.v20190101.DescribeMetricLastRequest import DescribeMetricLastRequest
import time,json,ast,requests
from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
client = AcsClient('', '','')
date = time.strftime("%Y-%m-%d-%H:%M:%S")
def get_ecs_info(lists):
curdate = time.strftime("%Y-%m-%d %H:%M:%S")
curdate_day = time.strftime("%Y%m%d")
request = DescribeInstancesRequest()
request.set_accept_format('json')
request.add_query_param('RegionId', 'cn-hangzhou')
request.set_PrivateIpAddresses(lists)
request.set_PageSize('100')
response = client.do_action_with_exception(request)
data = str(response,encoding='utf-8')
datas = json.loads(data)
info = datas['Instances']['Instance']
#(json.dumps(info, sort_keys=True, indent=4, separators=(', ', ': ')))
for k in info:
#print(k["Cpu"],k["Memory"],k["InstanceName"],k["ZoneId"],k["VpcAttributes"]["PrivateIpAddress"]["IpAddress"][0])
ip_info_list.append({"ip":"cn-hangzhou.%s"%k["VpcAttributes"]["PrivateIpAddress"]["IpAddress"][0],"InstanceId":k['InstanceId'],"InstanceName":k['InstanceName'],"Cpu":k['Cpu'],"Memory":k["Memory"],"ZoneId":k["ZoneId"]})
def get_node_list():
p = subprocess.Popen("kubectl top node | awk NR!=1{'print $0'}",stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
info = p.stdout.read().decode().split("\n")
info = [i for i in info if i != '']
return info
def get_node_system_data(nodename):
p = subprocess.Popen("kubectl describe node %s | sed -n '/Capacity:/,/Allocatable:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
info = p.stdout.read().decode()
info = info.split("\n")
for i in info:
i = i.split(" ")
i = [x for x in i if x != '']
if len(i) > 1:
node_system_lists.append({"nodename":nodename,i[0]:i[1]})
def get_node_usage_data(nodename):
p = subprocess.Popen("kubectl describe node %s | sed -n '/Allocated resources:/,/Events:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
info = p.stdout.read().decode()
info = info.split("\n")
for i in info:
i = i.split(" ")
i = [x for x in i if x != '']
try:
if i[0] in ['cpu','memory','ephemeral-storage']:
node_usage_lists.append({"nodename":nodename,i[0]:{'Requests':[i[1],i[2][1:-1]],'Limits':[i[3],i[4][1:-1]]}})
except:
pass
def get_pod_usage_data(nodename):
p = subprocess.Popen("kubectl describe node %s | sed -n '/Non-terminated Pods:/,/Allocated resources:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
info = p.stdout.read().decode()
info = info.split("\n")
for i in info:
i = i.split(" ")
i = [x for x in i if x != '']
try:
if len(i) > 10 and i[0] != 'Namespace':
pod_usage_lists.append({"nodename":nodename,'Namespace':i[0],'podname':i[1],'cpu_Requests':[i[2],i[3][1:-1]],'cpu_Limits':[i[4],i[5][1:-1]],'mem_Requests':[i[6],i[7][1:-1]],'mem_Limits':[i[8],i[9][1:-1]],'age':i[10]})
except:
pass
def t_data(lists):
dic = {}
for i in lists:
for k, v in i.items():
dic[k] = v
return dic
def count_node_data(node_name):
node_name = node_name.split(" ")
node_name = [x for x in node_name if x != '']
get_node_system_data(node_name[0])
node_count_system_lists.append(t_data(node_system_lists))
get_node_usage_data(node_name[0])
node_count_usage_lists.append(t_data(node_usage_lists))
def node_therad_task():
thread_list = []
for n in node_init:
t = threading.Thread(target=count_node_data, args=(n,))
thread_list.append(t)
for threadname in thread_list: threadname.start()
for threadname in thread_list: threadname.join()
if __name__=="__main__":
#装载接口获取ecs的信息
ip_info_list = []
#kubectl获取的node信息
node_system_lists = []
node_usage_lists = []
pod_usage_lists = []
#合并node基础信息列表
node_count_system_lists = []
node_count_usage_lists = []
node_count_usage_lists_insert = []
pod_count_usage_lists = []
pod_count_usage_lists_insert = []
#ip列表
iplist = []
#初始化get node信息
node_init = get_node_list()
#获取ip列表
for i in node_init:
i = i.split(" ")
i = [x for x in i if x != '']
iplist.append(i[0][12:])
#调用接口获取ecs信息
get_ecs_info(iplist)
node_therad_task()
for i in node_count_system_lists:
i["ephemeral-storage:"] = int(int(i["ephemeral-storage:"][:-2])/1024/1000)
i["memory:"] = int(int(i["memory:"][:-2])/1024/1000)
for i in node_count_usage_lists:
node_count_usage_lists_insert.append({'nodename': i["nodename"], 'cpu_Requests':i["cpu"]["Requests"][0],'cpu_Requests_usage':i["cpu"]["Requests"][1] ,'cpu_Limits':i["cpu"]["Limits"][0],'cpu_Limits_usage':i["cpu"]["Limits"][1], 'memory_Requests':i["memory"]["Requests"][0] ,'memory_Requests_usage':i["memory"]["Requests"][1] , 'memory_Limits':i["memory"]["Limits"][0], 'memory_Limits_usage':i["memory"]["Limits"][1], 'ephemeral-storage_Requests':i["ephemeral-storage"]["Requests"][0],'ephemeral-storage_Requests_usage':i["ephemeral-storage"]["Requests"][1], 'ephemeral-storage_Limits':i["ephemeral-storage"]["Limits"][0],'ephemeral-storage_Limits_usage':i["ephemeral-storage"]["Limits"][1]})
for i in node_init:
i = i.split(" ")
i = [x for x in i if x != '']
get_pod_usage_data(i[0])
for i in pod_usage_lists:
try:
if i["mem_Limits"][0][-2:] == "Gi":
i["mem_Limits"][0] = int(i["mem_Limits"][0][:-2]) * 1024
elif i["mem_Limits"][0][-2:] == "Mi":
i["mem_Limits"][0] = i["mem_Limits"][0][:-2]
if i["cpu_Requests"][0][-1] == "m":
i["cpu_Requests"][0] = i["cpu_Requests"][0][:-1]
else:
i["cpu_Requests"][0] = int(i["cpu_Requests"][0]) * 1000
if i["cpu_Limits"][0][-1] == "m":
i["cpu_Limits"][0] = i["cpu_Limits"][0][:-1]
else:
i["cpu_Limits"][0] = int(i["cpu_Limits"][0]) * 1000
if i["mem_Requests"][0][-2:] == "Gi":
i["mem_Requests"][0] = int(i["mem_Requests"][0][:-2]) * 1024
elif i["mem_Requests"][0][-2:] == "Mi":
i["mem_Requests"][0] = i["mem_Requests"][0][:-2]
except:
pass
pod_count_usage_lists_insert.append({'nodename': i["nodename"], 'Namespace': i["Namespace"], 'podname': i["podname"], 'cpu_Requests': i["cpu_Requests"][0], 'cpu_Requests_usage': i["cpu_Requests"][1], 'cpu_Limits': i["cpu_Limits"][0], 'cpu_Limits_usage': i["cpu_Limits"][1], 'mem_Requests': i["mem_Requests"][0], 'mem_Requests_usage': i["mem_Requests"][1], 'mem_Limits': i["mem_Limits"][0], 'mem_Limits_usage': i["mem_Limits"][1], 'age': i["age"]})
import mysql_thread_pod
mysql_thread_pod.ThreadInsert('ecs_info','',6,ip_info_list)
mysql_thread_pod.ThreadInsert('node_info','',7,node_count_system_lists)
mysql_thread_pod.ThreadInsert('node_usage_info','',13,node_count_usage_lists_insert)
mysql_thread_pod.ThreadInsert('node_pod_usage_info','',12,pod_count_usage_lists_insert)
三、从数据库读取后发送邮件
#!/usr/bin/env python
#coding=utf-8
import threading
import time,sys,os,xlwt
import sys,subprocess
import requests
import json,ast,yaml
import threading
import logging
import os,pymysql
from logging import handlers
from multiprocessing import Lock
from http.server import BaseHTTPRequestHandler, HTTPServer
import send_email
import datetime
def get_node_mysql_data():
node_list.clear()
db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
# 获取游标
cur = db.cursor()
# sql查询语句 表名blogs
sql = """SELECT t2.ecs_name as ecsname,t1.node_ip as nodeip, round(t2.ecs_cpu - round(t1.cpu_requests/1000,2),2) as '剩余cpu', round((t2.ecs_memory - round(t1.memory_requests ))/1024,2) as '剩余内存', 100 - round(t1.ephemeral_storage_requests_usage,2) as '剩余磁盘', t2.ecs_cpu as ecscpu, round(t1.cpu_requests/1000,2) as cpu_requests, round(t1.cpu_requests /1000/ t2.ecs_cpu * 100,2) as cpu_requests_usage , round(t1.cpu_limits / 1000,2) as cpu_limits,round(t1.cpu_limits/ 1000/t2.ecs_cpu *100 ,2) as cpu_limits_usage, t2.ecs_memory as ecsmemory, round(t1.memory_requests ) as memory_requests, round(t1.memory_requests/t2.ecs_memory *100,2) as memory_requests_usage, round(t1.memory_limits) as memory_limits , round(t1.memory_limits/t2.ecs_memory *100,2) as memory_limits_usage, round(t1.ephemeral_storage_requests_usage,2) as ephemeral_storage_requests_usage FROM node_usage_info t1 LEFT JOIN ecs_info t2 ON t1.node_ip = t2.ecs_ip and t1.day = t2.day where t1.day = '%s' GROUP BY t1.node_ip ORDER BY t1.cpu_requests_usage desc; """%curdate_now_day
try:
cur.execute(sql) # 执行sql语句
results = cur.fetchall() # 获取查询的所有记录
# 遍历结果
for row in results:
node_list.append({"ecsname":row[0],"nodeip":row[1],"kcpu":row[2],"kmem":row[3],"kdisk":row[4],"ecscpu":row[5],"rcpu":row[6],"lcpu":row[8],"ecsmem":row[10],"rmem":row[11],"lmem":row[13]})
except Exception as e:
raise e
finally:
db.close() # 关闭连接
return node_list
def get_ecs_info_mysql_data():
db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
# 获取游标
cur = db.cursor()
# sql查询语句 表名blogs
sql = """select SUM(ecs_cpu),SUM(ecs_memory) from ecs_info where day='%s';"""%curdate_now_day
try:
cur.execute(sql) # 执行sql语句
results = cur.fetchall() # 获取查询的所有记录
# 遍历结果
for row in results:
cpu = row[0]
mem = row[1]
except Exception as e:
raise e
finally:
db.close() # 关闭连接
return cpu,mem
def get_pod_k8s_mysql_data():
pod_list.clear()
cpu = get_ecs_info_mysql_data()[0]
mem = get_ecs_info_mysql_data()[1]
db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
# 获取游标
cur = db.cursor()
# sql查询语句 表名blogs
sql = """SELECT namespace, count(t2.node_ip) as nodeip, round(SUM(t1.cpu_requests)/1024,2) as cpu_requests, round(SUM(t1.cpu_requests)/1024/%s*100,2) as cpu_requests_usage, round(SUM(t1.cpu_limits)/1024,2) as cpu_limits, round(SUM(t1.cpu_limits)/1024/%s*100,2) as cpu_limits_usage, round(SUM(t1.memory_requests)/1024,2) as memory_requests, round(SUM(t1.memory_requests)/%s*100,2) as memory_requests_usage, round(SUM(t1.memory_limits)/1024,2) as memory_limits, round(SUM(t1.memory_limits)/%s*100,2) as memory_limits_usage FROM node_pod_usage_info t1 LEFT JOIN node_info t2 on t1.node_ip = t2.node_ip and t1.day = t2.day WHERE t1.day = '%s' and t1.namespace in ('operate-mdm','operate-store','operate-ehr','operate-web','supply-chain','supply-chain-web','trade-marketing','trade-marketing-web') GROUP BY t1.namespace ORDER BY cpu_requests_usage desc """%(cpu,cpu,mem,mem,curdate_now_day)
try:
cur.execute(sql) # 执行sql语句
results = cur.fetchall() # 获取查询的所有记录
# 遍历结果
for row in results:
pod_list.append({"namespace":row[0],"podc":row[1],"rcpu":row[2],"ucpu":row[3],"rmem":row[6],"umem":row[7],"lcpu":row[4],"lucpu":row[5],"lmem":row[8],"lumem":row[9]})
except Exception as e:
raise e
finally:
db.close() # 关闭连接
return pod_list
def node_str_info_put(sortlists):
for i in sortlists:
node_strslist.append("""
<tr>
<td>""" + "%s"%i["ecsname"] + """</td>
<td>""" + "%s"%i["nodeip"] + """</td>
<td>""" + "%sc"%str(i["kcpu"]) + """</td>
<td>""" + "%sG"%str(i["kmem"]) + """</td>
<td>""" + "%s%%"%str(i["kdisk"]) + """</td>
<td>""" + "%sc"%str(i["ecscpu"]) + """</td>
<td>""" + "%sc"%str(i["rcpu"]) + """</td>
<td>""" + "%sc"%str(i["lcpu"]) + """</td>
<td>""" + "%sM"%str(i["ecsmem"]) + """</td>
<td>""" + "%sM"%str(i["rmem"]) + """</td>
<td>""" + "%sM"%i["lmem"] + """</td>
</tr>""" )
strss = '\n'.join(node_strslist)
return strss
def pod_str_info_put(sortlists):
for i in sortlists:
pod_strslist.append("""
<tr>
<td>""" + "%s"%i["namespace"] + """</td>
<td>""" + "%s"%i["podc"] + """</td>
<td>""" + "%sc"%str(i["rcpu"]) + """</td>
<td>""" + "%s%%"%str(i["ucpu"]) + """</td>
<td>""" + "%sG"%str(i["rmem"]) + """</td>
<td>""" + "%s%%"%str(i["umem"]) + """</td>
<td>""" + "%sc"%str(i["lcpu"]) + """</td>
<td>""" + "%s%%"%str(i["lucpu"]) + """</td>
<td>""" + "%sG"%str(i["lmem"]) + """</td>
<td>""" + "%s%%"%str(i["lumem"]) + """</td>
</tr>""" )
strss = '\n'.join(pod_strslist)
return strss
if __name__=="__main__":
#获取昨天的时间
curdate = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime("%Y/%m/%d-%H:%M:%S")
curdate_day = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime("%Y%m%d")
curdate_month = time.strftime("%Y%m")
curdate_now_day = time.strftime("%Y%m%d")
#html tr拼接列表
node_strslist = []
pod_strslist = []
#查询pod信息装载数据
node_list = []
pod_list = []
#引用邮件功能 send_email.py
mail_host = "smtp.mxhichina.com"
mail_user = ""
mail_pass = ""
Se = send_email.SendEmail(mail_host,mail_user,mail_pass)
mailto_list = ['']
title = 'k8s资源利用检查'
get_node_mysql_data()
get_pod_k8s_mysql_data()
count = len(node_list)
count2 = len(pod_list)
content = """ \
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>k8s信息</title>
<body>
<div id="container">
<p><strong>node资源信息</strong></p>
<p>收集时间: """ + curdate_now_day + """</p>
<p>采集条数: """ + "%s"%count + """</p>
<div id="content">
<table width="1200" border="2" bordercolor="yellow" cellspacing="2">
<tr>
<td><strong>ecs名称</strong></td>
<td><strong>node名称</strong></td>
<td><strong>剩余cpu</strong></td>
<td><strong>剩余内存</strong></td>
<td><strong>剩余磁盘</strong></td>
<td><strong>ecscpu</strong></td>
<td><strong>请求cpu</strong></td>
<td><strong>限制cpu</strong></td>
<td><strong>ecs内存</strong></td>
<td><strong>请求内存</strong></td>
<td><strong>限制内存</strong></td>
</tr>
""" + node_str_info_put(node_list) + """
</table>
</div>
<p><strong>项目资源使用信息</strong></p>
<p>收集时间: """ + curdate_now_day + """</p>
<p>采集条数: """ + "%s"%count2 + """</p>
<div>
<table width="1200" border="2" bordercolor="yellow" cellspacing="2">
<tr>
<td><strong>命名空间</strong></td>
<td><strong>pod数量</strong></td>
<td><strong>请求cpu</strong></td>
<td><strong>cpu占比</strong></td>
<td><strong>请求内存</strong></td>
<td><strong>内存占比</strong></td>
<td><strong>限制cpu</strong></td>
<td><strong>限制cpu占比</strong></td>
<td><strong>限制内存</strong></td>
<td><strong>限制内存占比</strong></td>
</tr
""" + pod_str_info_put(pod_list) + """
</table>
</div>
</div>
<p><strong>请查看</strong> </p>
</div>
</body>
</html>
"""
Se.sendTxtMail(mailto_list, title, content)
效果如下 此处没有截出node
更多推荐
已为社区贡献3条内容
所有评论(0)