1. 前言:

在node节点request可供请求的资源不足时,新建pod无法正常运行,特此可以每天检查一次来,掌控资源利用情况。
通过收集node信息来进行数据处理,将收集的request和limit信息放进数据库,通过查询数据库进行数据展示

一、定义模型(表)

CREATE TABLE `ecs_info` (
  `ecs_ip` varchar(255) DEFAULT NULL COMMENT '',
  `ecs_InstanceId` varchar(32) DEFAULT NULL COMMENT 'ecs实例id',
  `ecs_name` varchar(255) DEFAULT NULL COMMENT 'ecs实例名称',
  `ecs_cpu` varchar(255) DEFAULT NULL COMMENT 'ecscpu',
  `ecs_memory` varchar(255) DEFAULT NULL COMMENT 'ecs内存',
  `ecs_zone` varchar(255) DEFAULT NULL COMMENT 'ecs地域',
  `day` varchar(255) NOT NULL DEFAULT '日期',
  `create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
  KEY `ecs_ip` (`ecs_ip`),
  KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4


CREATE TABLE `node_info` (
  `node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
  `node_cpu` varchar(255) DEFAULT NULL COMMENT 'nodecpu',
  `node_ephemeral_storage` varchar(32) DEFAULT NULL COMMENT 'node存储',
  `hugepages_1Gi` varchar(255) DEFAULT NULL COMMENT '',
  `hugepages_2Mi` varchar(255) DEFAULT NULL COMMENT '',
  `node_memory` varchar(255) DEFAULT NULL COMMENT 'node内存',
  `node_pods` varchar(255) DEFAULT NULL COMMENT 'pods',
  `day` varchar(255) NOT NULL DEFAULT '日期',
  `create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
   KEY `node_ip` (`node_ip`),
   KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4


CREATE TABLE `node_usage_info` (
  `node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
  `cpu_requests` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu',
  `cpu_requests_usage` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu使用',
  `cpu_limits` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu',
  `cpu_limits_usage` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu使用',
  `memory_requests` varchar(255) DEFAULT NULL COMMENT '请求内存',
  `memory_requests_usage` varchar(255) DEFAULT NULL COMMENT '请求内存使用',
  `memory_limits` varchar(255) DEFAULT NULL COMMENT '限制内存',
  `memory_limits_usage` varchar(255) DEFAULT NULL COMMENT '限制内存使用',
  `ephemeral_storage_requests` varchar(255) DEFAULT NULL COMMENT '请求存储',
  `ephemeral_storage_requests_usage` varchar(255) DEFAULT NULL COMMENT '',
  `ephemeral_storage_limits` varchar(255) DEFAULT NULL COMMENT '限制存储',
  `ephemeral_storage_limits_usage` varchar(255) DEFAULT NULL COMMENT '',
  `day` varchar(255) NOT NULL DEFAULT '日期',
  `create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
  KEY `node_ip` (`node_ip`),
  KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4


CREATE TABLE `node_pod_usage_info` (
  `node_ip` varchar(255) DEFAULT NULL COMMENT 'nodeip',
  `namespace` varchar(255) DEFAULT NULL COMMENT '命名空间',
  `podname` varchar(255) DEFAULT NULL COMMENT 'pod名称',
  `cpu_requests` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu',
  `cpu_requests_usage` varchar(255) DEFAULT NULL COMMENT 'node 请求cpu使用',
  `cpu_limits` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu',
  `cpu_limits_usage` varchar(32) DEFAULT NULL COMMENT 'node 限制cpu使用',
  `memory_requests` varchar(255) DEFAULT NULL COMMENT '请求内存',
  `memory_requests_usage` varchar(255) DEFAULT NULL COMMENT '请求内存使用',
  `memory_limits` varchar(255) DEFAULT NULL COMMENT '限制内存',
  `memory_limits_usage` varchar(255) DEFAULT NULL COMMENT '限制内存使用',
  `age` varchar(255) DEFAULT NULL COMMENT 'node运行时间',
  `day` varchar(255) NOT NULL DEFAULT '日期',
  `create_time` varchar(32) DEFAULT NULL COMMENT '创建时间',
  KEY `node_ip` (`node_ip`),
  KEY `namespace` (`namespace`),
  KEY `day` (`day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4

二、收集node信息处理插入数据库

#!/usr/bin/env python
#coding=utf-8

import threading
import time,sys,os
import sys,subprocess
from email.mime.text import MIMEText
import smtplib
from email.mime.multipart import MIMEMultipart
from email.header import Header
import requests
import smtplib
import json,ast,yaml
import logging
from logging import handlers
from multiprocessing import Lock
from http.server import BaseHTTPRequestHandler, HTTPServer
import pymysql
from apscheduler.schedulers.blocking import BlockingScheduler
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcms.request.v20190101.DescribeMetricLastRequest import DescribeMetricLastRequest
import time,json,ast,requests

from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest

client = AcsClient('', '','')
date = time.strftime("%Y-%m-%d-%H:%M:%S")


def get_ecs_info(lists):
    curdate = time.strftime("%Y-%m-%d %H:%M:%S")
    curdate_day = time.strftime("%Y%m%d")
    request = DescribeInstancesRequest()
    request.set_accept_format('json')
    request.add_query_param('RegionId', 'cn-hangzhou')
    request.set_PrivateIpAddresses(lists)
    request.set_PageSize('100')
    response = client.do_action_with_exception(request)
    data = str(response,encoding='utf-8')
    datas = json.loads(data)
    info = datas['Instances']['Instance']
    #(json.dumps(info, sort_keys=True, indent=4, separators=(', ', ': ')))
    for k in info:
        #print(k["Cpu"],k["Memory"],k["InstanceName"],k["ZoneId"],k["VpcAttributes"]["PrivateIpAddress"]["IpAddress"][0])
        ip_info_list.append({"ip":"cn-hangzhou.%s"%k["VpcAttributes"]["PrivateIpAddress"]["IpAddress"][0],"InstanceId":k['InstanceId'],"InstanceName":k['InstanceName'],"Cpu":k['Cpu'],"Memory":k["Memory"],"ZoneId":k["ZoneId"]})


def get_node_list():
    p = subprocess.Popen("kubectl top node | awk NR!=1{'print $0'}",stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
    info = p.stdout.read().decode().split("\n")
    info = [i for i in info if i != '']
    return info

def get_node_system_data(nodename):
    p = subprocess.Popen("kubectl describe node %s | sed -n '/Capacity:/,/Allocatable:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
    info = p.stdout.read().decode()
    info = info.split("\n")
    for i in info:
        i = i.split(" ")
        i = [x for x in i if x != '']
        if len(i) > 1:
            node_system_lists.append({"nodename":nodename,i[0]:i[1]})

def get_node_usage_data(nodename):
    p = subprocess.Popen("kubectl describe node %s | sed -n '/Allocated resources:/,/Events:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
    info = p.stdout.read().decode()
    info = info.split("\n")
    for i in info:
        i = i.split(" ")
        i = [x for x in i if x != '']
        try:
            if i[0] in ['cpu','memory','ephemeral-storage']:
                node_usage_lists.append({"nodename":nodename,i[0]:{'Requests':[i[1],i[2][1:-1]],'Limits':[i[3],i[4][1:-1]]}})
        except:
            pass

def get_pod_usage_data(nodename):
    p = subprocess.Popen("kubectl describe node %s | sed -n '/Non-terminated Pods:/,/Allocated resources:/p'"%(nodename),stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
    info = p.stdout.read().decode()
    info = info.split("\n")
    for i in info:
        i = i.split(" ")
        i = [x for x in i if x != '']
        try:
            if len(i) > 10 and i[0] != 'Namespace':
                pod_usage_lists.append({"nodename":nodename,'Namespace':i[0],'podname':i[1],'cpu_Requests':[i[2],i[3][1:-1]],'cpu_Limits':[i[4],i[5][1:-1]],'mem_Requests':[i[6],i[7][1:-1]],'mem_Limits':[i[8],i[9][1:-1]],'age':i[10]})
        except:
            pass
    
def t_data(lists):
    dic = {}
    for i in lists:
        for k, v in i.items():
            dic[k] = v
    return dic

def count_node_data(node_name): 
    node_name = node_name.split(" ")
    node_name = [x for x in node_name if x != '']
    get_node_system_data(node_name[0])
    node_count_system_lists.append(t_data(node_system_lists))
    get_node_usage_data(node_name[0])
    node_count_usage_lists.append(t_data(node_usage_lists))


def node_therad_task():
    thread_list = []
    for n in node_init:
        t = threading.Thread(target=count_node_data, args=(n,))
        thread_list.append(t)
    for threadname in thread_list: threadname.start()
    for threadname in thread_list: threadname.join()

if __name__=="__main__":
    #装载接口获取ecs的信息
    ip_info_list = []
    #kubectl获取的node信息
    node_system_lists = []
    node_usage_lists = []
    pod_usage_lists = []
    #合并node基础信息列表
    node_count_system_lists = []
    node_count_usage_lists = []
    node_count_usage_lists_insert = []
    pod_count_usage_lists = []
    pod_count_usage_lists_insert = []
    #ip列表
    iplist = []
    #初始化get node信息
    node_init =  get_node_list() 
    #获取ip列表
    for i in node_init:
        i = i.split(" ")
        i = [x for x in i if x != '']
        iplist.append(i[0][12:]) 
    #调用接口获取ecs信息
    get_ecs_info(iplist)
    node_therad_task()
    
    for i in node_count_system_lists:
        i["ephemeral-storage:"] = int(int(i["ephemeral-storage:"][:-2])/1024/1000)
        i["memory:"] = int(int(i["memory:"][:-2])/1024/1000)

    for i in node_count_usage_lists:
        node_count_usage_lists_insert.append({'nodename': i["nodename"], 'cpu_Requests':i["cpu"]["Requests"][0],'cpu_Requests_usage':i["cpu"]["Requests"][1] ,'cpu_Limits':i["cpu"]["Limits"][0],'cpu_Limits_usage':i["cpu"]["Limits"][1], 'memory_Requests':i["memory"]["Requests"][0] ,'memory_Requests_usage':i["memory"]["Requests"][1] , 'memory_Limits':i["memory"]["Limits"][0], 'memory_Limits_usage':i["memory"]["Limits"][1], 'ephemeral-storage_Requests':i["ephemeral-storage"]["Requests"][0],'ephemeral-storage_Requests_usage':i["ephemeral-storage"]["Requests"][1],  'ephemeral-storage_Limits':i["ephemeral-storage"]["Limits"][0],'ephemeral-storage_Limits_usage':i["ephemeral-storage"]["Limits"][1]}) 

    for i in node_init:
        i = i.split(" ")
        i = [x for x in i if x != '']
        get_pod_usage_data(i[0])
    for i in pod_usage_lists:
        try:
            if i["mem_Limits"][0][-2:] == "Gi":
                 i["mem_Limits"][0] = int(i["mem_Limits"][0][:-2]) * 1024

            elif i["mem_Limits"][0][-2:] == "Mi":
                i["mem_Limits"][0] = i["mem_Limits"][0][:-2]

            if i["cpu_Requests"][0][-1] == "m":
                i["cpu_Requests"][0]  = i["cpu_Requests"][0][:-1]
            else:
                i["cpu_Requests"][0] = int(i["cpu_Requests"][0]) * 1000

            if i["cpu_Limits"][0][-1] == "m":
                i["cpu_Limits"][0] = i["cpu_Limits"][0][:-1] 
            else:
                i["cpu_Limits"][0] = int(i["cpu_Limits"][0]) * 1000

            if i["mem_Requests"][0][-2:] == "Gi":
                i["mem_Requests"][0] = int(i["mem_Requests"][0][:-2]) * 1024

            elif i["mem_Requests"][0][-2:] == "Mi":
                i["mem_Requests"][0] = i["mem_Requests"][0][:-2]

        except:
            pass
 
        pod_count_usage_lists_insert.append({'nodename': i["nodename"], 'Namespace': i["Namespace"], 'podname': i["podname"], 'cpu_Requests': i["cpu_Requests"][0], 'cpu_Requests_usage': i["cpu_Requests"][1], 'cpu_Limits': i["cpu_Limits"][0], 'cpu_Limits_usage': i["cpu_Limits"][1], 'mem_Requests': i["mem_Requests"][0], 'mem_Requests_usage': i["mem_Requests"][1], 'mem_Limits': i["mem_Limits"][0], 'mem_Limits_usage': i["mem_Limits"][1], 'age': i["age"]})
    import mysql_thread_pod
    mysql_thread_pod.ThreadInsert('ecs_info','',6,ip_info_list)
    mysql_thread_pod.ThreadInsert('node_info','',7,node_count_system_lists)
    mysql_thread_pod.ThreadInsert('node_usage_info','',13,node_count_usage_lists_insert)
    mysql_thread_pod.ThreadInsert('node_pod_usage_info','',12,pod_count_usage_lists_insert)

三、从数据库读取后发送邮件


#!/usr/bin/env python
#coding=utf-8

import threading
import time,sys,os,xlwt
import sys,subprocess
import requests
import json,ast,yaml
import threading
import logging
import os,pymysql
from logging import handlers
from multiprocessing import Lock
from http.server import BaseHTTPRequestHandler, HTTPServer
import send_email
import datetime

def get_node_mysql_data():
    node_list.clear()
    db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
    # 获取游标
    cur = db.cursor()
    # sql查询语句 表名blogs
    sql = """SELECT  t2.ecs_name as ecsname,t1.node_ip as nodeip,  round(t2.ecs_cpu - round(t1.cpu_requests/1000,2),2) as '剩余cpu',  round((t2.ecs_memory - round(t1.memory_requests ))/1024,2) as '剩余内存',  100 - round(t1.ephemeral_storage_requests_usage,2) as  '剩余磁盘', t2.ecs_cpu as ecscpu,   round(t1.cpu_requests/1000,2) as cpu_requests,  round(t1.cpu_requests /1000/ t2.ecs_cpu * 100,2) as cpu_requests_usage ,        round(t1.cpu_limits / 1000,2) as cpu_limits,round(t1.cpu_limits/ 1000/t2.ecs_cpu *100 ,2) as cpu_limits_usage, t2.ecs_memory as ecsmemory,      round(t1.memory_requests ) as memory_requests,  round(t1.memory_requests/t2.ecs_memory *100,2) as memory_requests_usage,        round(t1.memory_limits) as memory_limits ,      round(t1.memory_limits/t2.ecs_memory *100,2) as memory_limits_usage,    round(t1.ephemeral_storage_requests_usage,2) as ephemeral_storage_requests_usage FROM   node_usage_info t1 LEFT JOIN ecs_info t2 ON t1.node_ip = t2.ecs_ip and t1.day = t2.day where t1.day = '%s'   GROUP BY t1.node_ip ORDER BY t1.cpu_requests_usage desc;  """%curdate_now_day
    try:
        cur.execute(sql)  # 执行sql语句
        results = cur.fetchall()  # 获取查询的所有记录
        # 遍历结果
        for row in results:
            node_list.append({"ecsname":row[0],"nodeip":row[1],"kcpu":row[2],"kmem":row[3],"kdisk":row[4],"ecscpu":row[5],"rcpu":row[6],"lcpu":row[8],"ecsmem":row[10],"rmem":row[11],"lmem":row[13]})
    except Exception as e:
        raise e
    finally:
        db.close()  # 关闭连接
    return node_list

def get_ecs_info_mysql_data():
    db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
    # 获取游标
    cur = db.cursor()
    # sql查询语句 表名blogs
    sql = """select SUM(ecs_cpu),SUM(ecs_memory) from ecs_info where day='%s';"""%curdate_now_day
    try:
        cur.execute(sql)  # 执行sql语句
        results = cur.fetchall()  # 获取查询的所有记录
        # 遍历结果
        for row in results:
            cpu = row[0]
            mem = row[1]
    except Exception as e:
        raise e
    finally:
        db.close()  # 关闭连接
    return cpu,mem 


def get_pod_k8s_mysql_data():
    pod_list.clear()
    cpu = get_ecs_info_mysql_data()[0]
    mem = get_ecs_info_mysql_data()[1]
    db = pymysql.connect("127.0.0.1","ops","Pindao@ops1234","devops_data")
    # 获取游标
    cur = db.cursor()
    # sql查询语句 表名blogs
    sql = """SELECT namespace,       count(t2.node_ip) as nodeip,    round(SUM(t1.cpu_requests)/1024,2) as cpu_requests,  round(SUM(t1.cpu_requests)/1024/%s*100,2) as cpu_requests_usage,       round(SUM(t1.cpu_limits)/1024,2) as cpu_limits, round(SUM(t1.cpu_limits)/1024/%s*100,2) as cpu_limits_usage,        round(SUM(t1.memory_requests)/1024,2) as memory_requests,       round(SUM(t1.memory_requests)/%s*100,2) as memory_requests_usage,   round(SUM(t1.memory_limits)/1024,2) as memory_limits,   round(SUM(t1.memory_limits)/%s*100,2) as memory_limits_usage FROM   node_pod_usage_info t1 LEFT JOIN node_info t2 on t1.node_ip = t2.node_ip and t1.day = t2.day WHERE      t1.day = '%s' and t1.namespace in ('operate-mdm','operate-store','operate-ehr','operate-web','supply-chain','supply-chain-web','trade-marketing','trade-marketing-web') GROUP BY  t1.namespace ORDER BY cpu_requests_usage desc """%(cpu,cpu,mem,mem,curdate_now_day)
    try:
        cur.execute(sql)  # 执行sql语句
        results = cur.fetchall()  # 获取查询的所有记录
        # 遍历结果
        for row in results:
            pod_list.append({"namespace":row[0],"podc":row[1],"rcpu":row[2],"ucpu":row[3],"rmem":row[6],"umem":row[7],"lcpu":row[4],"lucpu":row[5],"lmem":row[8],"lumem":row[9]})
    except Exception as e:
        raise e
    finally:
        db.close()  # 关闭连接
    return pod_list


def node_str_info_put(sortlists):
    for i in sortlists:
        node_strslist.append("""
<tr> 
<td>""" + "%s"%i["ecsname"] + """</td> 
<td>""" + "%s"%i["nodeip"] + """</td> 
<td>""" + "%sc"%str(i["kcpu"]) + """</td> 
<td>""" + "%sG"%str(i["kmem"]) + """</td> 
<td>""" + "%s%%"%str(i["kdisk"]) + """</td> 
<td>""" + "%sc"%str(i["ecscpu"]) + """</td> 
<td>""" + "%sc"%str(i["rcpu"]) + """</td> 
<td>""" + "%sc"%str(i["lcpu"]) + """</td> 
<td>""" + "%sM"%str(i["ecsmem"]) + """</td> 
<td>""" + "%sM"%str(i["rmem"]) + """</td> 
<td>""" + "%sM"%i["lmem"] + """</td> 
</tr>"""    )

    strss = '\n'.join(node_strslist)
    return strss

def pod_str_info_put(sortlists):
    for i in sortlists:
        pod_strslist.append("""
<tr> 
<td>""" + "%s"%i["namespace"] + """</td> 
<td>""" + "%s"%i["podc"] + """</td> 
<td>""" + "%sc"%str(i["rcpu"]) + """</td> 
<td>""" + "%s%%"%str(i["ucpu"]) + """</td> 
<td>""" + "%sG"%str(i["rmem"]) + """</td> 
<td>""" + "%s%%"%str(i["umem"]) + """</td> 
<td>""" + "%sc"%str(i["lcpu"]) + """</td> 
<td>""" + "%s%%"%str(i["lucpu"]) + """</td> 
<td>""" + "%sG"%str(i["lmem"]) + """</td> 
<td>""" + "%s%%"%str(i["lumem"]) + """</td> 
</tr>"""    )

    strss = '\n'.join(pod_strslist)
    return strss

if __name__=="__main__":
    #获取昨天的时间
    curdate = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime("%Y/%m/%d-%H:%M:%S")
    curdate_day = (datetime.datetime.now()+datetime.timedelta(days=-1)).strftime("%Y%m%d")
    curdate_month = time.strftime("%Y%m")
    curdate_now_day = time.strftime("%Y%m%d")
    #html tr拼接列表
    node_strslist = []
    pod_strslist = []
    #查询pod信息装载数据
    node_list = []
    pod_list = []
    #引用邮件功能 send_email.py
    mail_host = "smtp.mxhichina.com"
    mail_user = ""
    mail_pass = ""
    Se = send_email.SendEmail(mail_host,mail_user,mail_pass)
    mailto_list = ['']
    title = 'k8s资源利用检查'
    get_node_mysql_data()
    get_pod_k8s_mysql_data()
    count = len(node_list)
    count2 = len(pod_list)
    
    content = """ \
<html> 
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
<title>k8s信息</title> 
<body> 
<div id="container">
<p><strong>node资源信息</strong></p> 
<p>收集时间: """ + curdate_now_day + """</p>
<p>采集条数: """ + "%s"%count + """</p>
<div id="content">
<table width="1200" border="2" bordercolor="yellow"  cellspacing="2">
<tr>
<td><strong>ecs名称</strong></td> 
<td><strong>node名称</strong></td> 
<td><strong>剩余cpu</strong></td> 
<td><strong>剩余内存</strong></td> 
<td><strong>剩余磁盘</strong></td> 
<td><strong>ecscpu</strong></td> 
<td><strong>请求cpu</strong></td> 
<td><strong>限制cpu</strong></td> 
<td><strong>ecs内存</strong></td> 
<td><strong>请求内存</strong></td> 
<td><strong>限制内存</strong></td> 
</tr> 
""" + node_str_info_put(node_list) + """
</table> 
</div> 
<p><strong>项目资源使用信息</strong></p> 
<p>收集时间: """ + curdate_now_day + """</p>
<p>采集条数: """ + "%s"%count2 + """</p>
<div>
<table width="1200" border="2" bordercolor="yellow"  cellspacing="2">
<tr>
<td><strong>命名空间</strong></td> 
<td><strong>pod数量</strong></td> 
<td><strong>请求cpu</strong></td> 
<td><strong>cpu占比</strong></td> 
<td><strong>请求内存</strong></td> 
<td><strong>内存占比</strong></td> 
<td><strong>限制cpu</strong></td> 
<td><strong>限制cpu占比</strong></td> 
<td><strong>限制内存</strong></td> 
<td><strong>限制内存占比</strong></td> 
</tr
""" + pod_str_info_put(pod_list) + """
</table>
</div> 

</div> 
<p><strong>请查看</strong> </p> 
</div> 
</body> 
</html>
"""
    Se.sendTxtMail(mailto_list, title, content)

效果如下 此处没有截出node
在这里插入图片描述
在这里插入图片描述

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐