1

需求

因为目前服务器规模较小,使用zabbix,nagios 等开源的监控系统的必要性并不高,加上配置维护花费的时间成本,所以决定通过自己的脚本,配合saltstack来处理。
监控原理很简单,server端负责处理监控信息,agent 端负责收集信息,并统一发送到服务器端。

服务器端

脚本目录
├── weixin.py
├── __init__.py
└── main.py

main.py

#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
import time, socket, threading,json
from weixin import senddata,gettoken

def tcplink(sock, addr):
    print 'New Connection from %s:%s...' % addr
    res={}
    while True:
        data = sock.recv(1024)
        time.sleep(1)
        if data == 'exit' or not data:
            break
        res = data
        handler(res)
    sock.close()
    print 'Connection from %s:%s closed.' % addr
    return res

# 错误报告
def report(data):
    content = ''
    for d in data:
        content = content + d + "\n"
    print content
    corpid = 'xxxxxxxxxxxx'

    corpsecret = 'xxxxxxxxxxxxxxxxx'

    accesstoken = gettoken(corpid, corpsecret)

    msg = senddata(accesstoken, content)
    print msg
    print data


# 处理客户端消息,根据阈值判断
def handler(res):
    try:
        data = json.loads(res)
    except Exception,e:
        print e
        print "Data type wrong."
        return False
    m_type = data['type']

    # 服务器资源监控
    if m_type == 1:
        # ip
        ip = data['ip']
        # ip
        name = data['name']

        # cpu 利用率
        cpu_use = data['cpu_use']

        # cpu load (可以改进通过获取cpu核数来动态判断)
        cpu_load = data['cpu_load']

        # 内存 利用率
        mem_use = data['mem_use']

        # 磁盘利用率
        disk_use = data['disk_use']

        message = ["ip: %s" % ip, "name: %s" % name]
        print ip,cpu_use,cpu_load,mem_use,disk_use
        if cpu_use > 95:
            message.append("cpu_use: %s" % cpu_use)
        if cpu_load > 3:
            message.append("cpu_load: %s" % cpu_load)
        if mem_use > 85:
            message.append("mem_use: %s" % mem_use)
        if disk_use > 75:
            message.append("disk_use: %s" % disk_use)

        if message.__len__() > 2:
            report(message)
            return True
    # 服务监控
    elif m_type == 2:
        print "service eyes..."
        print data
        message = ["oops some service down!"]
        if data["status"] == 1:
            message.append("message: %s" % data)
            report(message)
            return True

if __name__=="__main__":
    print "Minitor Service Listening on 9999 port."
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('0.0.0.0', 9999))
    s.listen(5)
    while True:
        sock, addr = s.accept()
        t = threading.Thread(target=tcplink, args=(sock, addr))
        t.start()

weixin.py

import requests
import json
import sys

def gettoken(corp_id, corp_secret):

    gettoken_url = 'https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid=' + corp_id + '&corpsecret=' + corp_secret

    try:

        token_file = requests.get(gettoken_url)

    except requests.HTTPError as e:

        print(e.code)

        print(e.read().decode("utf8"))

    token_data = token_file.text.decode('utf-8')

    token_json = json.loads(token_data)

    token_json.keys()

    token = token_json['access_token']

    return token


def senddata(access_token,content):

    send_url = 'https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token=' + access_token

    send_values = {

        "touser":"187xxxxxxxx|185xxxxxxxx",

        "msgtype":"text",

        "agentid":"17",

        "text":{

            "content":content

            },

        "safe":"0"

        }

    send_data = json.dumps(send_values, ensure_ascii=False).encode(encoding='UTF8')

    response = requests.post(send_url, send_data)


    msg = response.text

    return msg


default_encoding = 'utf-8'

if sys.getdefaultencoding() != default_encoding:

    reload(sys)

    sys.setdefaultencoding(default_encoding)

客户端 1

# monitor.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import division
import socket
import psutil
import os


# 内存
def getMonitor():

    # 主机信息
    name =  socket.getfqdn(socket.gethostname())
    ip = socket.gethostbyname(name)

    # n内存
    mem=psutil.virtual_memory()
    mem_use = int((mem.available/mem.total)*100)

    # cpu

    cpuload_1, cpuload_5, cpuload_15 = os.getloadavg()
    cpu_load = cpuload_5

    # cpu_use = psutil.cpu_percent(1)
    cpu = psutil.cpu_percent(interval=5, percpu=True)
    cpu_count = psutil.cpu_count()
    cpu_use_total = 0
    for c in cpu:
        cpu_use_total=cpu_use_total + c
    cpu_use = cpu_use_total/cpu_count

    # 磁盘
    disk_use = psutil.disk_usage('/').percent
    data = {
        "type": 1,
        "ip": ip,
        "name": name,
        "cpu_load": cpu_load,
        "cpu_use": cpu_use,
        "mem_use": mem_use,
        "disk_use": disk_use,
    }
    print str(data)
    return str(data).replace("'", '"')


s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 建立连接:
s.connect(('server_ip', 9999))

data = getMonitor()
try:
    s.send(data)
    s.close()
except Exception,e:
    print e
    s.close()

客户端 2

#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import division
import socket
import os,commands,json

# 获取通过systemclt 工具管理的系统服务状态
def check_status(service_name):
    status = os.system('sudo systemctl status ' + service_name+ ' > /dev/null')
    return status 

# 要监控的服务列表
service_lists = ['config.service','xxx.service','xxx.service'] 


def get_status(service_lists):
    """
    type == 1 硬件监控
    type == 2 服务监控
    type == x xxxxxx
    """
    data = {"type": 2, "status": 0}
    for service in service_lists:
        re = check_status(service)
        if re != 0:
            data[service] = "down"
            data["status"] = 1
    print str(data)
    return str(data).replace("'", '"')

data = get_status(service_lists)

if json.loads(data)["status"] == 1:
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    # 建立连接:
    s.connect(('server_ip', 9999))
    try:
        s.send(data)
        s.close()
    except Exception,e:
        print e
        s.close()

运行方式

  • 客户端
    在saltstack 服务器上定时执行 监控脚本

*/5 * * * * salt '*' cmd.script salt://scripts/monitor.py python_shell=true
*/5 * * * * salt '*' cmd.script salt://scripts/monitor_service_status.py python_shell=true

  • 服务器
    加入系统进程,侦听tcp端口


阿西达卡
14 声望3 粉丝

努力工作,认真生活